Blob Blame History Raw
From 22936cd8ed0f0d5b0c3ece75752e2877f46af455 Mon Sep 17 00:00:00 2001
From: Patrick Uiterwijk <puiterwijk@redhat.com>
Date: Wed, 21 Sep 2016 13:48:07 +0000
Subject: [PATCH 1/3] Rewrite http.py to use python-requests

This will work around an issue when pycurl is used in the same context as
rpm is, as used by koji-containerbuild.

Signed-off-by: Patrick Uiterwijk <puiterwijk@redhat.com>
---
 osbs/http.py | 290 +++++++++--------------------------------------------------
 1 file changed, 42 insertions(+), 248 deletions(-)

diff --git a/osbs/http.py b/osbs/http.py
index 9955c55..5f39141 100644
--- a/osbs/http.py
+++ b/osbs/http.py
@@ -24,77 +24,17 @@
 import logging
 from io import BytesIO
 
-import pycurl
-
 from osbs.exceptions import OsbsException, OsbsNetworkException, OsbsResponseException
 
+import requests
 try:
-    # py2
-    import httplib
+    from requests_kerberos import HTTPKerberosAuth
 except ImportError:
-    # py3
-    import http.client as httplib
+    HTTPKerberosAuth = None
 
 
-logger = logging.getLogger(__name__)
 
-SELECT_TIMEOUT = 9999
-PYCURL_NETWORK_CODES = [pycurl.E_BAD_CONTENT_ENCODING,
-                        pycurl.E_BAD_DOWNLOAD_RESUME,
-                        pycurl.E_CONV_FAILED,
-                        pycurl.E_CONV_REQD,
-                        pycurl.E_COULDNT_CONNECT,
-                        pycurl.E_COULDNT_RESOLVE_HOST,
-                        pycurl.E_COULDNT_RESOLVE_PROXY,
-                        pycurl.E_FILESIZE_EXCEEDED,
-                        pycurl.E_HTTP_POST_ERROR,
-                        pycurl.E_HTTP_RANGE_ERROR,
-                        pycurl.E_HTTP_RETURNED_ERROR,
-                        pycurl.E_LOGIN_DENIED,
-                        # old pycurl: E_OPERATION_TIMEOUTED, new pycurl: E_OPERATION_TIMEDOUT
-                        getattr(pycurl, "E_OPERATION_TIMEDOUT", "E_OPERATION_TIMEOUTED"),
-                        pycurl.E_PARTIAL_FILE,
-                        pycurl.E_READ_ERROR,
-                        pycurl.E_RECV_ERROR,
-                        pycurl.E_REMOTE_FILE_NOT_FOUND,
-                        pycurl.E_SEND_ERROR,
-                        pycurl.E_SSL_CACERT,
-                        pycurl.E_SSL_CERTPROBLEM,
-                        pycurl.E_SSL_CIPHER,
-                        pycurl.E_SSL_CONNECT_ERROR,
-                        pycurl.E_SSL_PEER_CERTIFICATE,
-                        pycurl.E_SSL_SHUTDOWN_FAILED,
-                        pycurl.E_TOO_MANY_REDIRECTS,
-                        pycurl.E_UNSUPPORTED_PROTOCOL,
-                        pycurl.E_WRITE_ERROR]
-
-PYCURL_NETWORK_CODES = [x for x in PYCURL_NETWORK_CODES if x is not None]
-
-
-def parse_headers(all_headers):
-    # all_headers contains headers from all responses - even without FOLLOWLOCATION there
-    # might be multiple sets of headers due to 401 Unauthorized. We only care about the last
-    # response.
-    try:
-        raw_headers = all_headers.split(b"\r\n\r\n")[-2]
-    except IndexError:
-        logger.warning('Incorrectly terminated http headers')
-        raw_headers = all_headers
-
-    logger.debug("raw headers: " + repr(raw_headers))
-
-    # http://stackoverflow.com/questions/24728088/python-parse-http-response-string/24729316#24729316
-    class FakeSocket(object):
-        def __init__(self, response_str):
-            self._file = BytesIO(response_str)
-
-        def makefile(self, *args, **kwargs):
-            return self._file
-
-    response = httplib.HTTPResponse(FakeSocket(raw_headers))
-    response.begin()
-    header_list = [(k.lower(), v) for (k, v) in response.getheaders()]
-    return dict(header_list)
+logger = logging.getLogger(__name__)
 
 
 class HttpSession(object):
@@ -120,22 +60,9 @@ def request(self, url, *args, **kwargs):
                 return stream
 
             with stream as s:
-                # joining at once is much faster than doing += in a loop
-                all_chunks = list(s.iter_chunks())
-                content = ''.join(all_chunks)
+                content = s.req.content
                 return HttpResponse(s.status_code, s.headers, content)
-        except pycurl.error as ex:
-            code = ex.args[0]
-            try:
-                message = ex.args[1]
-            except IndexError:
-                # happened on rhel 6
-                message = ""
-            if code in PYCURL_NETWORK_CODES:
-                raise OsbsNetworkException(url, message, code, *ex.args[2:],
-                                           cause=ex,
-                                           traceback=sys.exc_info()[2])
-
+        except Exception as ex:
             raise OsbsException(cause=ex, traceback=sys.exc_info()[2])
 
 
@@ -160,203 +87,68 @@ def __init__(self, url, method, data=None, kerberos_auth=False,
 
         self.status_code = 0
         self.headers = None
-        self.response_buffer = BytesIO()
-        self.headers_buffer = BytesIO()
-        self.response_decoder = None
 
         self.url = url
         headers = headers or {}
         method = method.lower()
 
-        self.c = pycurl.Curl()
-        self.curl_multi = pycurl.CurlMulti()
-
-        if method == 'post':
-            self.c.setopt(pycurl.POST, 1)
-            headers["Expect"] = ""  # openshift can't handle Expect
-        elif method == 'get':
-            self.c.setopt(pycurl.HTTPGET, 1)
-        elif method == 'put':
-            # self.c.setopt(pycurl.PUT, 1)
-            self.c.setopt(pycurl.CUSTOMREQUEST, b"PUT")
-            headers["Expect"] = ""
-        elif method == 'delete':
-            self.c.setopt(pycurl.CUSTOMREQUEST, b"DELETE")
-        else:
+        if method not in ['post', 'get', 'put', 'delete']:
             raise RuntimeError("Unsupported method '%s' for curl call!" % method)
 
-        self.c.setopt(pycurl.COOKIEFILE, b'')
-        self.c.setopt(pycurl.URL, str(url))
-        self.c.setopt(pycurl.WRITEFUNCTION, self.response_buffer.write)
-        self.c.setopt(pycurl.HEADERFUNCTION, self.headers_buffer.write)
-        self.c.setopt(pycurl.DEBUGFUNCTION, self._curl_debug)
-        self.c.setopt(pycurl.SSL_VERIFYPEER, 1 if verify_ssl else 0)
-        self.c.setopt(pycurl.SSL_VERIFYHOST, 2 if verify_ssl else 0)
-        if ca:
-            logger.info("Setting CAINFO to %r", ca)
-            self.c.setopt(pycurl.CAINFO, ca)
-
-        self.c.setopt(pycurl.VERBOSE, 1 if verbose else 0)
+        args = {}
+
+        if method in ['post', 'put']:
+            headers['Expect'] = ''
+
+        if not verify_ssl:
+            args['verify'] = False
+        else:
+            if ca:
+                args['verify'] = ca
+            else:
+                args['verify'] = True
+
         if username and password:
-            username = username.encode('utf-8')
-            password = password.encode('utf-8')
-            self.c.setopt(pycurl.USERPWD, username + b":" + password)
+            args['auth'] = (username, password)
 
         if client_cert and client_key:
-            self.c.setopt(pycurl.SSLCERTTYPE, "PEM")
-            self.c.setopt(pycurl.SSLKEYTYPE, "PEM")
-            self.c.setopt(pycurl.SSLCERT, client_cert)
-            self.c.setopt(pycurl.SSLKEY, client_key)
+            args['cert'] = (client_cert, client_key)
 
         if data:
-            # curl sets the method to post if one sets any POSTFIELDS (even '')
-            self.c.setopt(pycurl.POSTFIELDS, data)
+            args['data'] = data
 
         if use_json:
-            headers['Content-Type'] = b'application/json'
+            headers['Content-Type'] = 'application/json'
 
-        if allow_redirects:
-            self.c.setopt(pycurl.FOLLOWLOCATION, 1)
+        args['allow_redirects'] = allow_redirects
 
         if kerberos_auth:
-            self.c.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_GSSNEGOTIATE)
-            self.c.setopt(pycurl.USERPWD, b':')
+            if not HTTPKerberosAuth:
+                raise RuntimeError('Kerberos auth unavailable')
+            args['auth'] = HTTPKerberosAuth()
 
         if stream:
-            headers['Cache-Control'] = b'no-cache'
-
-        if headers:
-            header_list = []
-            for header_key, header_value in headers.items():
-                header_list.append(str("%s: %s" % (header_key, header_value)))
-            self.c.setopt(pycurl.HTTPHEADER, header_list)
-
-        self.curl_multi.add_handle(self.c)
-
-        # Send request and read all headers. We have all headers once we receive some data or once
-        # the response ends.
-        # NOTE: HTTP response in chunked encoding can contain additional headers ("trailers") in the
-        # last chunk. This is not handled here.
-        while not (self.finished or self._any_data_received()):
-            self._select()
-            self._perform()
-
-        self.headers = parse_headers(self.headers_buffer.getvalue())
-        self.status_code = self.c.getinfo(pycurl.HTTP_CODE)
-        self.response_decoder = codecs.getincrementaldecoder(self.encoding)()
-
-    def _perform(self):
-        while True:
-            ret, num_handles = self.curl_multi.perform()
-            if ret != pycurl.E_CALL_MULTI_PERFORM:
-                # see curl_multi_perform manpage
-                break
-
-        num_q, _, err_list = self.curl_multi.info_read()
-        if num_q != 0:
-            logger.warning("CurlMulti.info_read() has %s remaining messages", num_q)
-
-        if err_list:
-            err_obj = err_list[0]
-
-            # For anything but the connection being closed, raise
-            if err_obj[1] != pycurl.E_PARTIAL_FILE:
-                raise OsbsNetworkException(self.url, err_obj[2], err_obj[1])
-
-        self.finished = (num_handles == 0)
-
-    def _select(self):
-        sel = self.curl_multi.select(SELECT_TIMEOUT)
-        if sel == -1:
-            raise OsbsException("CurlMulti.select() timed out")
-        elif sel == 0:
-            # sel==0 means curl_multi_fdset returned -1
-            # manual page suggests sleeping >100ms when this happens:(
-            time.sleep(0.1)
-
-    def _any_data_received(self):
-        return self.response_buffer.tell() != 0
+            args['stream'] = True
+
+        args['headers'] = headers
+        self.req = requests.request(method, url, **args)
+
+        self.headers = self.req.headers
+        self.status_code = self.req.status_code
 
     def _get_received_data(self):
-        result = self.response_buffer.getvalue()
-        self.response_buffer.truncate(0)
-        self.response_buffer.seek(0)
-        return self.response_decoder.decode(result, final=self.finished)
+        return self.req.content
 
     def iter_chunks(self):
-        while True:
-            self._perform()
-            if self._any_data_received():
-                yield self._get_received_data()
-            if self.finished:
-                break
-            self._select()
-
-        logger.debug("end of the stream")
-        self.close()
+        return self.req.iter_content(None)
 
     def iter_lines(self):
-        chunks = self.iter_chunks()
-        return self._split_lines_from_chunks(chunks)
-
-    @staticmethod
-    def _split_lines_from_chunks(chunks):
-        # same behaviour as requests' Response.iter_lines(...)
-
-        pending = None
-        for chunk in chunks:
-
-            if pending is not None:
-                chunk = pending + chunk
-            lines = chunk.splitlines()
-
-            if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]:
-                pending = lines.pop()
-            else:
-                pending = None
-
-            for line in lines:
-                yield line
-
-        if pending is not None:
-            yield pending
-
-    @staticmethod
-    def _curl_debug(debug_type, debug_msg):
-        try:
-            logger_name = {
-                pycurl.INFOTYPE_TEXT: 'curl',
-                pycurl.INFOTYPE_HEADER_IN: 'in',
-                pycurl.INFOTYPE_HEADER_OUT: 'out'
-            }[debug_type]
-        except KeyError:
-            return
-
-        curl_logger = logging.getLogger(__name__ + '.' + logger_name)
-        for line in debug_msg.splitlines():
-            if not line:
-                continue
-            curl_logger.debug(line)
-
-    @property
-    def encoding(self):
-        encoding = None
-        if 'content-type' in self.headers:
-            content_type = self.headers['content-type'].lower()
-            match = re.search(r'charset=(\S+)', content_type)
-            if match:
-                encoding = match.group(1)
-        if encoding is None:
-            encoding = 'utf-8'  # assume utf-8
-
-        return encoding
+        return self.req.iter_lines()
 
     def close(self):
         if not self.closed:
             logger.debug("cleaning up")
-            self.curl_multi.remove_handle(self.c)
-            self.c.close()
-            self.curl_multi.close()
+            del self.req
         self.closed = True
 
     def __del__(self):
@@ -376,7 +168,9 @@ def __init__(self, status_code, headers, content):
         self.content = content
 
     def json(self, check=True):
-        if check and self.status_code not in (0, httplib.OK, httplib.CREATED):
+        if check and self.status_code not in (0, requests.codes.OK, requests.codes.CREATED):
             raise OsbsResponseException(self.content, self.status_code)
 
+        if isinstance(self.content, bytes):
+            self.content = self.content.decode('utf-8')
         return json.loads(self.content)

From b626346936a0cd12cdaed028152311f70afcdc41 Mon Sep 17 00:00:00 2001
From: Patrick Uiterwijk <puiterwijk@redhat.com>
Date: Wed, 21 Sep 2016 14:13:25 +0000
Subject: [PATCH 2/3] Fix tests for porting of http client to requests

Signed-off-by: Patrick Uiterwijk <puiterwijk@redhat.com>
---
 tests/test_http.py | 58 +-----------------------------------------------------
 1 file changed, 1 insertion(+), 57 deletions(-)

diff --git a/tests/test_http.py b/tests/test_http.py
index 674f9eb..30d2f14 100644
--- a/tests/test_http.py
+++ b/tests/test_http.py
@@ -12,7 +12,7 @@
 import pytest
 
 import osbs.http as osbs_http
-from osbs.http import parse_headers, HttpSession, HttpStream
+from osbs.http import HttpSession, HttpStream
 from osbs.exceptions import OsbsNetworkException
 
 from tests.fake_api import Connection, ResponseMapping
@@ -34,22 +34,6 @@ def has_connection():
         return False
 
 
-class TestParseHeaders(object):
-    def test_parse_headers(self):
-        conn = Connection("0.5.4")
-        rm = ResponseMapping("0.5.4", lookup=conn.get_definition_for)
-
-        key, value = conn.get_definition_for("/oauth/authorize")
-        file_name = value["get"]["file"]
-        raw_headers = rm.get_response_content(file_name)
-
-        headers = parse_headers(raw_headers)
-
-        assert headers is not None
-        assert len(headers.items()) > 0
-        assert headers["location"]
-
-
 @pytest.mark.skipif(not has_connection(),
                     reason="requires internet connection")
 class TestHttpSession(object):
@@ -141,43 +125,3 @@ def test_raise(self, s):
             with s.get("http://httpbin.org/stream/3", stream=True) as s:
                 raise RuntimeError("hi")
         assert s.closed
-
-
-class TestHttpStream(object):
-    @pytest.mark.parametrize('chunks,expected_content', [
-        ([b'foo', b'', b'bar', b'baz'], u'foobarbaz'),
-        ([b'a', b'b', b'\xc4', b'\x8d', b'x'], u'ab\u010dx'),
-        ([b'\xe2', b'\x8a', b'\x86'], u'\u2286'),
-        ([b'\xe2\x8a', b'\x86'], u'\u2286'),
-        ([b'\xe2', b'\x8a\x86'], u'\u2286'),
-        ([b'aaaa', b'\xe2\x8a', b'\x86'], u'aaaa\u2286'),
-        ([b'aaaa\xe2\x8a', b'\x86'], u'aaaa\u2286'),
-        ([b'\xe2\x8a', b'\x86ffff'], u'\u2286ffff'),
-    ])
-    def test_http_multibyte_decoding(self, chunks, expected_content):
-        class Whatever(object):
-            def __getattr__(self, name):
-                return self
-
-            def __call__(self, *args, **kwargs):
-                return self
-        flexmock(pycurl).should_receive('Curl').and_return(Whatever())
-        flexmock(pycurl).should_receive('CurlMulti').and_return(Whatever())
-        (flexmock(osbs_http).should_receive('parse_headers')
-                            .and_return({'content-type': 'application/json; charset=utf-8'}))
-        flexmock(HttpStream, _select=lambda: None)
-
-        def mock_perform(self):
-            if chunks:
-                self.response_buffer.write(chunks.pop(0))
-            else:
-                self.finished = True
-
-        try:
-            orig_perform = HttpStream._perform
-            HttpStream._perform = mock_perform
-
-            r = HttpSession(verbose=True).get('http://')
-            assert r.content == expected_content
-        finally:
-            HttpStream._perform = orig_perform

From cdd2061d6397e159bb32413b39e538be7685b505 Mon Sep 17 00:00:00 2001
From: Patrick Uiterwijk <puiterwijk@redhat.com>
Date: Wed, 21 Sep 2016 14:27:16 +0000
Subject: [PATCH 3/3] Update spec file to include python-requests

Signed-off-by: Patrick Uiterwijk <puiterwijk@redhat.com>
---
 osbs-client.spec | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/osbs-client.spec b/osbs-client.spec
index 298aadf..c0b3cd5 100644
--- a/osbs-client.spec
+++ b/osbs-client.spec
@@ -60,7 +60,8 @@ BuildRequires:  python-pytest-capturelog
 BuildRequires:  python-flexmock
 BuildRequires:  python-six
 BuildRequires:  python-dockerfile-parse
-BuildRequires:  python-pycurl
+BuildRequires:  python-requests
+BuildRequires:  python-requests-kerberos
 %endif # with_check
 
 %if 0%{?with_python3}
@@ -73,7 +74,8 @@ BuildRequires:  python3-pytest-capturelog
 BuildRequires:  python3-flexmock
 BuildRequires:  python3-six
 BuildRequires:  python3-dockerfile-parse
-BuildRequires:  python3-pycurl
+BuildRequires:  python3-requests
+BuildRequires:  python3-requests-kerberos
 %endif # with_check
 %endif # with_python3
 
@@ -91,7 +93,8 @@ Summary:        Python 2 module for OpenShift Build Service
 Group:          Development/Tools
 License:        BSD
 Requires:       python-dockerfile-parse
-Requires:       python-pycurl
+Requires:       python-requests
+Requires:       python-requests-kerberos
 Requires:       python-setuptools
 Requires:       krb5-workstation
 %if 0%{?rhel} && 0%{?rhel} <= 6
@@ -113,7 +116,8 @@ Summary:        Python 3 module for OpenShift Build Service
 Group:          Development/Tools
 License:        BSD
 Requires:       python3-dockerfile-parse
-Requires:       python3-pycurl
+Requires:       python3-requests
+Requires:       python3-requests-kerberos
 Requires:       python3-dateutil
 Requires:       python3-setuptools
 Requires:       krb5-workstation