From 22936cd8ed0f0d5b0c3ece75752e2877f46af455 Mon Sep 17 00:00:00 2001 From: Patrick Uiterwijk Date: Wed, 21 Sep 2016 13:48:07 +0000 Subject: [PATCH 1/3] Rewrite http.py to use python-requests This will work around an issue when pycurl is used in the same context as rpm is, as used by koji-containerbuild. Signed-off-by: Patrick Uiterwijk --- osbs/http.py | 290 +++++++++-------------------------------------------------- 1 file changed, 42 insertions(+), 248 deletions(-) diff --git a/osbs/http.py b/osbs/http.py index 9955c55..5f39141 100644 --- a/osbs/http.py +++ b/osbs/http.py @@ -24,77 +24,17 @@ import logging from io import BytesIO -import pycurl - from osbs.exceptions import OsbsException, OsbsNetworkException, OsbsResponseException +import requests try: - # py2 - import httplib + from requests_kerberos import HTTPKerberosAuth except ImportError: - # py3 - import http.client as httplib + HTTPKerberosAuth = None -logger = logging.getLogger(__name__) -SELECT_TIMEOUT = 9999 -PYCURL_NETWORK_CODES = [pycurl.E_BAD_CONTENT_ENCODING, - pycurl.E_BAD_DOWNLOAD_RESUME, - pycurl.E_CONV_FAILED, - pycurl.E_CONV_REQD, - pycurl.E_COULDNT_CONNECT, - pycurl.E_COULDNT_RESOLVE_HOST, - pycurl.E_COULDNT_RESOLVE_PROXY, - pycurl.E_FILESIZE_EXCEEDED, - pycurl.E_HTTP_POST_ERROR, - pycurl.E_HTTP_RANGE_ERROR, - pycurl.E_HTTP_RETURNED_ERROR, - pycurl.E_LOGIN_DENIED, - # old pycurl: E_OPERATION_TIMEOUTED, new pycurl: E_OPERATION_TIMEDOUT - getattr(pycurl, "E_OPERATION_TIMEDOUT", "E_OPERATION_TIMEOUTED"), - pycurl.E_PARTIAL_FILE, - pycurl.E_READ_ERROR, - pycurl.E_RECV_ERROR, - pycurl.E_REMOTE_FILE_NOT_FOUND, - pycurl.E_SEND_ERROR, - pycurl.E_SSL_CACERT, - pycurl.E_SSL_CERTPROBLEM, - pycurl.E_SSL_CIPHER, - pycurl.E_SSL_CONNECT_ERROR, - pycurl.E_SSL_PEER_CERTIFICATE, - pycurl.E_SSL_SHUTDOWN_FAILED, - pycurl.E_TOO_MANY_REDIRECTS, - pycurl.E_UNSUPPORTED_PROTOCOL, - pycurl.E_WRITE_ERROR] - -PYCURL_NETWORK_CODES = [x for x in PYCURL_NETWORK_CODES if x is not None] - - -def parse_headers(all_headers): - # all_headers contains headers from all responses - even without FOLLOWLOCATION there - # might be multiple sets of headers due to 401 Unauthorized. We only care about the last - # response. - try: - raw_headers = all_headers.split(b"\r\n\r\n")[-2] - except IndexError: - logger.warning('Incorrectly terminated http headers') - raw_headers = all_headers - - logger.debug("raw headers: " + repr(raw_headers)) - - # http://stackoverflow.com/questions/24728088/python-parse-http-response-string/24729316#24729316 - class FakeSocket(object): - def __init__(self, response_str): - self._file = BytesIO(response_str) - - def makefile(self, *args, **kwargs): - return self._file - - response = httplib.HTTPResponse(FakeSocket(raw_headers)) - response.begin() - header_list = [(k.lower(), v) for (k, v) in response.getheaders()] - return dict(header_list) +logger = logging.getLogger(__name__) class HttpSession(object): @@ -120,22 +60,9 @@ def request(self, url, *args, **kwargs): return stream with stream as s: - # joining at once is much faster than doing += in a loop - all_chunks = list(s.iter_chunks()) - content = ''.join(all_chunks) + content = s.req.content return HttpResponse(s.status_code, s.headers, content) - except pycurl.error as ex: - code = ex.args[0] - try: - message = ex.args[1] - except IndexError: - # happened on rhel 6 - message = "" - if code in PYCURL_NETWORK_CODES: - raise OsbsNetworkException(url, message, code, *ex.args[2:], - cause=ex, - traceback=sys.exc_info()[2]) - + except Exception as ex: raise OsbsException(cause=ex, traceback=sys.exc_info()[2]) @@ -160,203 +87,68 @@ def __init__(self, url, method, data=None, kerberos_auth=False, self.status_code = 0 self.headers = None - self.response_buffer = BytesIO() - self.headers_buffer = BytesIO() - self.response_decoder = None self.url = url headers = headers or {} method = method.lower() - self.c = pycurl.Curl() - self.curl_multi = pycurl.CurlMulti() - - if method == 'post': - self.c.setopt(pycurl.POST, 1) - headers["Expect"] = "" # openshift can't handle Expect - elif method == 'get': - self.c.setopt(pycurl.HTTPGET, 1) - elif method == 'put': - # self.c.setopt(pycurl.PUT, 1) - self.c.setopt(pycurl.CUSTOMREQUEST, b"PUT") - headers["Expect"] = "" - elif method == 'delete': - self.c.setopt(pycurl.CUSTOMREQUEST, b"DELETE") - else: + if method not in ['post', 'get', 'put', 'delete']: raise RuntimeError("Unsupported method '%s' for curl call!" % method) - self.c.setopt(pycurl.COOKIEFILE, b'') - self.c.setopt(pycurl.URL, str(url)) - self.c.setopt(pycurl.WRITEFUNCTION, self.response_buffer.write) - self.c.setopt(pycurl.HEADERFUNCTION, self.headers_buffer.write) - self.c.setopt(pycurl.DEBUGFUNCTION, self._curl_debug) - self.c.setopt(pycurl.SSL_VERIFYPEER, 1 if verify_ssl else 0) - self.c.setopt(pycurl.SSL_VERIFYHOST, 2 if verify_ssl else 0) - if ca: - logger.info("Setting CAINFO to %r", ca) - self.c.setopt(pycurl.CAINFO, ca) - - self.c.setopt(pycurl.VERBOSE, 1 if verbose else 0) + args = {} + + if method in ['post', 'put']: + headers['Expect'] = '' + + if not verify_ssl: + args['verify'] = False + else: + if ca: + args['verify'] = ca + else: + args['verify'] = True + if username and password: - username = username.encode('utf-8') - password = password.encode('utf-8') - self.c.setopt(pycurl.USERPWD, username + b":" + password) + args['auth'] = (username, password) if client_cert and client_key: - self.c.setopt(pycurl.SSLCERTTYPE, "PEM") - self.c.setopt(pycurl.SSLKEYTYPE, "PEM") - self.c.setopt(pycurl.SSLCERT, client_cert) - self.c.setopt(pycurl.SSLKEY, client_key) + args['cert'] = (client_cert, client_key) if data: - # curl sets the method to post if one sets any POSTFIELDS (even '') - self.c.setopt(pycurl.POSTFIELDS, data) + args['data'] = data if use_json: - headers['Content-Type'] = b'application/json' + headers['Content-Type'] = 'application/json' - if allow_redirects: - self.c.setopt(pycurl.FOLLOWLOCATION, 1) + args['allow_redirects'] = allow_redirects if kerberos_auth: - self.c.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_GSSNEGOTIATE) - self.c.setopt(pycurl.USERPWD, b':') + if not HTTPKerberosAuth: + raise RuntimeError('Kerberos auth unavailable') + args['auth'] = HTTPKerberosAuth() if stream: - headers['Cache-Control'] = b'no-cache' - - if headers: - header_list = [] - for header_key, header_value in headers.items(): - header_list.append(str("%s: %s" % (header_key, header_value))) - self.c.setopt(pycurl.HTTPHEADER, header_list) - - self.curl_multi.add_handle(self.c) - - # Send request and read all headers. We have all headers once we receive some data or once - # the response ends. - # NOTE: HTTP response in chunked encoding can contain additional headers ("trailers") in the - # last chunk. This is not handled here. - while not (self.finished or self._any_data_received()): - self._select() - self._perform() - - self.headers = parse_headers(self.headers_buffer.getvalue()) - self.status_code = self.c.getinfo(pycurl.HTTP_CODE) - self.response_decoder = codecs.getincrementaldecoder(self.encoding)() - - def _perform(self): - while True: - ret, num_handles = self.curl_multi.perform() - if ret != pycurl.E_CALL_MULTI_PERFORM: - # see curl_multi_perform manpage - break - - num_q, _, err_list = self.curl_multi.info_read() - if num_q != 0: - logger.warning("CurlMulti.info_read() has %s remaining messages", num_q) - - if err_list: - err_obj = err_list[0] - - # For anything but the connection being closed, raise - if err_obj[1] != pycurl.E_PARTIAL_FILE: - raise OsbsNetworkException(self.url, err_obj[2], err_obj[1]) - - self.finished = (num_handles == 0) - - def _select(self): - sel = self.curl_multi.select(SELECT_TIMEOUT) - if sel == -1: - raise OsbsException("CurlMulti.select() timed out") - elif sel == 0: - # sel==0 means curl_multi_fdset returned -1 - # manual page suggests sleeping >100ms when this happens:( - time.sleep(0.1) - - def _any_data_received(self): - return self.response_buffer.tell() != 0 + args['stream'] = True + + args['headers'] = headers + self.req = requests.request(method, url, **args) + + self.headers = self.req.headers + self.status_code = self.req.status_code def _get_received_data(self): - result = self.response_buffer.getvalue() - self.response_buffer.truncate(0) - self.response_buffer.seek(0) - return self.response_decoder.decode(result, final=self.finished) + return self.req.content def iter_chunks(self): - while True: - self._perform() - if self._any_data_received(): - yield self._get_received_data() - if self.finished: - break - self._select() - - logger.debug("end of the stream") - self.close() + return self.req.iter_content(None) def iter_lines(self): - chunks = self.iter_chunks() - return self._split_lines_from_chunks(chunks) - - @staticmethod - def _split_lines_from_chunks(chunks): - # same behaviour as requests' Response.iter_lines(...) - - pending = None - for chunk in chunks: - - if pending is not None: - chunk = pending + chunk - lines = chunk.splitlines() - - if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]: - pending = lines.pop() - else: - pending = None - - for line in lines: - yield line - - if pending is not None: - yield pending - - @staticmethod - def _curl_debug(debug_type, debug_msg): - try: - logger_name = { - pycurl.INFOTYPE_TEXT: 'curl', - pycurl.INFOTYPE_HEADER_IN: 'in', - pycurl.INFOTYPE_HEADER_OUT: 'out' - }[debug_type] - except KeyError: - return - - curl_logger = logging.getLogger(__name__ + '.' + logger_name) - for line in debug_msg.splitlines(): - if not line: - continue - curl_logger.debug(line) - - @property - def encoding(self): - encoding = None - if 'content-type' in self.headers: - content_type = self.headers['content-type'].lower() - match = re.search(r'charset=(\S+)', content_type) - if match: - encoding = match.group(1) - if encoding is None: - encoding = 'utf-8' # assume utf-8 - - return encoding + return self.req.iter_lines() def close(self): if not self.closed: logger.debug("cleaning up") - self.curl_multi.remove_handle(self.c) - self.c.close() - self.curl_multi.close() + del self.req self.closed = True def __del__(self): @@ -376,7 +168,9 @@ def __init__(self, status_code, headers, content): self.content = content def json(self, check=True): - if check and self.status_code not in (0, httplib.OK, httplib.CREATED): + if check and self.status_code not in (0, requests.codes.OK, requests.codes.CREATED): raise OsbsResponseException(self.content, self.status_code) + if isinstance(self.content, bytes): + self.content = self.content.decode('utf-8') return json.loads(self.content) From b626346936a0cd12cdaed028152311f70afcdc41 Mon Sep 17 00:00:00 2001 From: Patrick Uiterwijk Date: Wed, 21 Sep 2016 14:13:25 +0000 Subject: [PATCH 2/3] Fix tests for porting of http client to requests Signed-off-by: Patrick Uiterwijk --- tests/test_http.py | 58 +----------------------------------------------------- 1 file changed, 1 insertion(+), 57 deletions(-) diff --git a/tests/test_http.py b/tests/test_http.py index 674f9eb..30d2f14 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -12,7 +12,7 @@ import pytest import osbs.http as osbs_http -from osbs.http import parse_headers, HttpSession, HttpStream +from osbs.http import HttpSession, HttpStream from osbs.exceptions import OsbsNetworkException from tests.fake_api import Connection, ResponseMapping @@ -34,22 +34,6 @@ def has_connection(): return False -class TestParseHeaders(object): - def test_parse_headers(self): - conn = Connection("0.5.4") - rm = ResponseMapping("0.5.4", lookup=conn.get_definition_for) - - key, value = conn.get_definition_for("/oauth/authorize") - file_name = value["get"]["file"] - raw_headers = rm.get_response_content(file_name) - - headers = parse_headers(raw_headers) - - assert headers is not None - assert len(headers.items()) > 0 - assert headers["location"] - - @pytest.mark.skipif(not has_connection(), reason="requires internet connection") class TestHttpSession(object): @@ -141,43 +125,3 @@ def test_raise(self, s): with s.get("http://httpbin.org/stream/3", stream=True) as s: raise RuntimeError("hi") assert s.closed - - -class TestHttpStream(object): - @pytest.mark.parametrize('chunks,expected_content', [ - ([b'foo', b'', b'bar', b'baz'], u'foobarbaz'), - ([b'a', b'b', b'\xc4', b'\x8d', b'x'], u'ab\u010dx'), - ([b'\xe2', b'\x8a', b'\x86'], u'\u2286'), - ([b'\xe2\x8a', b'\x86'], u'\u2286'), - ([b'\xe2', b'\x8a\x86'], u'\u2286'), - ([b'aaaa', b'\xe2\x8a', b'\x86'], u'aaaa\u2286'), - ([b'aaaa\xe2\x8a', b'\x86'], u'aaaa\u2286'), - ([b'\xe2\x8a', b'\x86ffff'], u'\u2286ffff'), - ]) - def test_http_multibyte_decoding(self, chunks, expected_content): - class Whatever(object): - def __getattr__(self, name): - return self - - def __call__(self, *args, **kwargs): - return self - flexmock(pycurl).should_receive('Curl').and_return(Whatever()) - flexmock(pycurl).should_receive('CurlMulti').and_return(Whatever()) - (flexmock(osbs_http).should_receive('parse_headers') - .and_return({'content-type': 'application/json; charset=utf-8'})) - flexmock(HttpStream, _select=lambda: None) - - def mock_perform(self): - if chunks: - self.response_buffer.write(chunks.pop(0)) - else: - self.finished = True - - try: - orig_perform = HttpStream._perform - HttpStream._perform = mock_perform - - r = HttpSession(verbose=True).get('http://') - assert r.content == expected_content - finally: - HttpStream._perform = orig_perform From cdd2061d6397e159bb32413b39e538be7685b505 Mon Sep 17 00:00:00 2001 From: Patrick Uiterwijk Date: Wed, 21 Sep 2016 14:27:16 +0000 Subject: [PATCH 3/3] Update spec file to include python-requests Signed-off-by: Patrick Uiterwijk --- osbs-client.spec | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/osbs-client.spec b/osbs-client.spec index 298aadf..c0b3cd5 100644 --- a/osbs-client.spec +++ b/osbs-client.spec @@ -60,7 +60,8 @@ BuildRequires: python-pytest-capturelog BuildRequires: python-flexmock BuildRequires: python-six BuildRequires: python-dockerfile-parse -BuildRequires: python-pycurl +BuildRequires: python-requests +BuildRequires: python-requests-kerberos %endif # with_check %if 0%{?with_python3} @@ -73,7 +74,8 @@ BuildRequires: python3-pytest-capturelog BuildRequires: python3-flexmock BuildRequires: python3-six BuildRequires: python3-dockerfile-parse -BuildRequires: python3-pycurl +BuildRequires: python3-requests +BuildRequires: python3-requests-kerberos %endif # with_check %endif # with_python3 @@ -91,7 +93,8 @@ Summary: Python 2 module for OpenShift Build Service Group: Development/Tools License: BSD Requires: python-dockerfile-parse -Requires: python-pycurl +Requires: python-requests +Requires: python-requests-kerberos Requires: python-setuptools Requires: krb5-workstation %if 0%{?rhel} && 0%{?rhel} <= 6 @@ -113,7 +116,8 @@ Summary: Python 3 module for OpenShift Build Service Group: Development/Tools License: BSD Requires: python3-dockerfile-parse -Requires: python3-pycurl +Requires: python3-requests +Requires: python3-requests-kerberos Requires: python3-dateutil Requires: python3-setuptools Requires: krb5-workstation