diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/urllib3/contrib')
9 files changed, 2178 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/urllib3/contrib/__init__.py b/.venv/lib/python3.12/site-packages/urllib3/contrib/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/urllib3/contrib/__init__.py diff --git a/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/__init__.py b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/__init__.py new file mode 100644 index 00000000..8a3c5beb --- /dev/null +++ b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/__init__.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import urllib3.connection + +from ...connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from .connection import EmscriptenHTTPConnection, EmscriptenHTTPSConnection + + +def inject_into_urllib3() -> None: + # override connection classes to use emscripten specific classes + # n.b. mypy complains about the overriding of classes below + # if it isn't ignored + HTTPConnectionPool.ConnectionCls = EmscriptenHTTPConnection + HTTPSConnectionPool.ConnectionCls = EmscriptenHTTPSConnection + urllib3.connection.HTTPConnection = EmscriptenHTTPConnection # type: ignore[misc,assignment] + urllib3.connection.HTTPSConnection = EmscriptenHTTPSConnection # type: ignore[misc,assignment] diff --git a/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/connection.py b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/connection.py new file mode 100644 index 00000000..41bfd279 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/connection.py @@ -0,0 +1,255 @@ +from __future__ import annotations + +import os +import typing + +# use http.client.HTTPException for consistency with non-emscripten +from http.client import HTTPException as HTTPException # noqa: F401 +from http.client import ResponseNotReady + +from ..._base_connection import _TYPE_BODY +from ...connection import HTTPConnection, ProxyConfig, port_by_scheme +from ...exceptions import TimeoutError +from ...response import BaseHTTPResponse +from ...util.connection import _TYPE_SOCKET_OPTIONS +from ...util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT +from ...util.url import Url +from .fetch import _RequestError, _TimeoutError, send_request, send_streaming_request +from .request import EmscriptenRequest +from .response import EmscriptenHttpResponseWrapper, EmscriptenResponse + +if typing.TYPE_CHECKING: + from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection + + +class EmscriptenHTTPConnection: + default_port: typing.ClassVar[int] = port_by_scheme["http"] + default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS] + + timeout: None | (float) + + host: str + port: int + blocksize: int + source_address: tuple[str, int] | None + socket_options: _TYPE_SOCKET_OPTIONS | None + + proxy: Url | None + proxy_config: ProxyConfig | None + + is_verified: bool = False + proxy_is_verified: bool | None = None + + _response: EmscriptenResponse | None + + def __init__( + self, + host: str, + port: int = 0, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 8192, + socket_options: _TYPE_SOCKET_OPTIONS | None = None, + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + ) -> None: + self.host = host + self.port = port + self.timeout = timeout if isinstance(timeout, float) else 0.0 + self.scheme = "http" + self._closed = True + self._response = None + # ignore these things because we don't + # have control over that stuff + self.proxy = None + self.proxy_config = None + self.blocksize = blocksize + self.source_address = None + self.socket_options = None + self.is_verified = False + + def set_tunnel( + self, + host: str, + port: int | None = 0, + headers: typing.Mapping[str, str] | None = None, + scheme: str = "http", + ) -> None: + pass + + def connect(self) -> None: + pass + + def request( + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + # We know *at least* botocore is depending on the order of the + # first 3 parameters so to be safe we only mark the later ones + # as keyword-only to ensure we have space to extend. + *, + chunked: bool = False, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + ) -> None: + self._closed = False + if url.startswith("/"): + # no scheme / host / port included, make a full url + url = f"{self.scheme}://{self.host}:{self.port}" + url + request = EmscriptenRequest( + url=url, + method=method, + timeout=self.timeout if self.timeout else 0, + decode_content=decode_content, + ) + request.set_body(body) + if headers: + for k, v in headers.items(): + request.set_header(k, v) + self._response = None + try: + if not preload_content: + self._response = send_streaming_request(request) + if self._response is None: + self._response = send_request(request) + except _TimeoutError as e: + raise TimeoutError(e.message) from e + except _RequestError as e: + raise HTTPException(e.message) from e + + def getresponse(self) -> BaseHTTPResponse: + if self._response is not None: + return EmscriptenHttpResponseWrapper( + internal_response=self._response, + url=self._response.request.url, + connection=self, + ) + else: + raise ResponseNotReady() + + def close(self) -> None: + self._closed = True + self._response = None + + @property + def is_closed(self) -> bool: + """Whether the connection either is brand new or has been previously closed. + If this property is True then both ``is_connected`` and ``has_connected_to_proxy`` + properties must be False. + """ + return self._closed + + @property + def is_connected(self) -> bool: + """Whether the connection is actively connected to any origin (proxy or target)""" + return True + + @property + def has_connected_to_proxy(self) -> bool: + """Whether the connection has successfully connected to its proxy. + This returns False if no proxy is in use. Used to determine whether + errors are coming from the proxy layer or from tunnelling to the target origin. + """ + return False + + +class EmscriptenHTTPSConnection(EmscriptenHTTPConnection): + default_port = port_by_scheme["https"] + # all this is basically ignored, as browser handles https + cert_reqs: int | str | None = None + ca_certs: str | None = None + ca_cert_dir: str | None = None + ca_cert_data: None | str | bytes = None + cert_file: str | None + key_file: str | None + key_password: str | None + ssl_context: typing.Any | None + ssl_version: int | str | None = None + ssl_minimum_version: int | None = None + ssl_maximum_version: int | None = None + assert_hostname: None | str | typing.Literal[False] + assert_fingerprint: str | None = None + + def __init__( + self, + host: str, + port: int = 0, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 16384, + socket_options: ( + None | _TYPE_SOCKET_OPTIONS + ) = HTTPConnection.default_socket_options, + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + cert_reqs: int | str | None = None, + assert_hostname: None | str | typing.Literal[False] = None, + assert_fingerprint: str | None = None, + server_hostname: str | None = None, + ssl_context: typing.Any | None = None, + ca_certs: str | None = None, + ca_cert_dir: str | None = None, + ca_cert_data: None | str | bytes = None, + ssl_minimum_version: int | None = None, + ssl_maximum_version: int | None = None, + ssl_version: int | str | None = None, # Deprecated + cert_file: str | None = None, + key_file: str | None = None, + key_password: str | None = None, + ) -> None: + super().__init__( + host, + port=port, + timeout=timeout, + source_address=source_address, + blocksize=blocksize, + socket_options=socket_options, + proxy=proxy, + proxy_config=proxy_config, + ) + self.scheme = "https" + + self.key_file = key_file + self.cert_file = cert_file + self.key_password = key_password + self.ssl_context = ssl_context + self.server_hostname = server_hostname + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + self.ssl_version = ssl_version + self.ssl_minimum_version = ssl_minimum_version + self.ssl_maximum_version = ssl_maximum_version + self.ca_certs = ca_certs and os.path.expanduser(ca_certs) + self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) + self.ca_cert_data = ca_cert_data + + self.cert_reqs = None + + # The browser will automatically verify all requests. + # We have no control over that setting. + self.is_verified = True + + def set_cert( + self, + key_file: str | None = None, + cert_file: str | None = None, + cert_reqs: int | str | None = None, + key_password: str | None = None, + ca_certs: str | None = None, + assert_hostname: None | str | typing.Literal[False] = None, + assert_fingerprint: str | None = None, + ca_cert_dir: str | None = None, + ca_cert_data: None | str | bytes = None, + ) -> None: + pass + + +# verify that this class implements BaseHTTP(s) connection correctly +if typing.TYPE_CHECKING: + _supports_http_protocol: BaseHTTPConnection = EmscriptenHTTPConnection("", 0) + _supports_https_protocol: BaseHTTPSConnection = EmscriptenHTTPSConnection("", 0) diff --git a/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/emscripten_fetch_worker.js b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/emscripten_fetch_worker.js new file mode 100644 index 00000000..243b8622 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/emscripten_fetch_worker.js @@ -0,0 +1,110 @@ +let Status = { + SUCCESS_HEADER: -1, + SUCCESS_EOF: -2, + ERROR_TIMEOUT: -3, + ERROR_EXCEPTION: -4, +}; + +let connections = {}; +let nextConnectionID = 1; +const encoder = new TextEncoder(); + +self.addEventListener("message", async function (event) { + if (event.data.close) { + let connectionID = event.data.close; + delete connections[connectionID]; + return; + } else if (event.data.getMore) { + let connectionID = event.data.getMore; + let { curOffset, value, reader, intBuffer, byteBuffer } = + connections[connectionID]; + // if we still have some in buffer, then just send it back straight away + if (!value || curOffset >= value.length) { + // read another buffer if required + try { + let readResponse = await reader.read(); + + if (readResponse.done) { + // read everything - clear connection and return + delete connections[connectionID]; + Atomics.store(intBuffer, 0, Status.SUCCESS_EOF); + Atomics.notify(intBuffer, 0); + // finished reading successfully + // return from event handler + return; + } + curOffset = 0; + connections[connectionID].value = readResponse.value; + value = readResponse.value; + } catch (error) { + console.log("Request exception:", error); + let errorBytes = encoder.encode(error.message); + let written = errorBytes.length; + byteBuffer.set(errorBytes); + intBuffer[1] = written; + Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION); + Atomics.notify(intBuffer, 0); + } + } + + // send as much buffer as we can + let curLen = value.length - curOffset; + if (curLen > byteBuffer.length) { + curLen = byteBuffer.length; + } + byteBuffer.set(value.subarray(curOffset, curOffset + curLen), 0); + + Atomics.store(intBuffer, 0, curLen); // store current length in bytes + Atomics.notify(intBuffer, 0); + curOffset += curLen; + connections[connectionID].curOffset = curOffset; + + return; + } else { + // start fetch + let connectionID = nextConnectionID; + nextConnectionID += 1; + const intBuffer = new Int32Array(event.data.buffer); + const byteBuffer = new Uint8Array(event.data.buffer, 8); + try { + const response = await fetch(event.data.url, event.data.fetchParams); + // return the headers first via textencoder + var headers = []; + for (const pair of response.headers.entries()) { + headers.push([pair[0], pair[1]]); + } + let headerObj = { + headers: headers, + status: response.status, + connectionID, + }; + const headerText = JSON.stringify(headerObj); + let headerBytes = encoder.encode(headerText); + let written = headerBytes.length; + byteBuffer.set(headerBytes); + intBuffer[1] = written; + // make a connection + connections[connectionID] = { + reader: response.body.getReader(), + intBuffer: intBuffer, + byteBuffer: byteBuffer, + value: undefined, + curOffset: 0, + }; + // set header ready + Atomics.store(intBuffer, 0, Status.SUCCESS_HEADER); + Atomics.notify(intBuffer, 0); + // all fetching after this goes through a new postmessage call with getMore + // this allows for parallel requests + } catch (error) { + console.log("Request exception:", error); + let errorBytes = encoder.encode(error.message); + let written = errorBytes.length; + byteBuffer.set(errorBytes); + intBuffer[1] = written; + Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION); + Atomics.notify(intBuffer, 0); + } + } +}); +self.postMessage({ inited: true }); diff --git a/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/fetch.py b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/fetch.py new file mode 100644 index 00000000..a514306e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/fetch.py @@ -0,0 +1,708 @@ +""" +Support for streaming http requests in emscripten. + +A few caveats - + +If your browser (or Node.js) has WebAssembly JavaScript Promise Integration enabled +https://github.com/WebAssembly/js-promise-integration/blob/main/proposals/js-promise-integration/Overview.md +*and* you launch pyodide using `pyodide.runPythonAsync`, this will fetch data using the +JavaScript asynchronous fetch api (wrapped via `pyodide.ffi.call_sync`). In this case +timeouts and streaming should just work. + +Otherwise, it uses a combination of XMLHttpRequest and a web-worker for streaming. + +This approach has several caveats: + +Firstly, you can't do streaming http in the main UI thread, because atomics.wait isn't allowed. +Streaming only works if you're running pyodide in a web worker. + +Secondly, this uses an extra web worker and SharedArrayBuffer to do the asynchronous fetch +operation, so it requires that you have crossOriginIsolation enabled, by serving over https +(or from localhost) with the two headers below set: + + Cross-Origin-Opener-Policy: same-origin + Cross-Origin-Embedder-Policy: require-corp + +You can tell if cross origin isolation is successfully enabled by looking at the global crossOriginIsolated variable in +JavaScript console. If it isn't, streaming requests will fallback to XMLHttpRequest, i.e. getting the whole +request into a buffer and then returning it. it shows a warning in the JavaScript console in this case. + +Finally, the webworker which does the streaming fetch is created on initial import, but will only be started once +control is returned to javascript. Call `await wait_for_streaming_ready()` to wait for streaming fetch. + +NB: in this code, there are a lot of JavaScript objects. They are named js_* +to make it clear what type of object they are. +""" + +from __future__ import annotations + +import io +import json +from email.parser import Parser +from importlib.resources import files +from typing import TYPE_CHECKING, Any + +import js # type: ignore[import-not-found] +from pyodide.ffi import ( # type: ignore[import-not-found] + JsArray, + JsException, + JsProxy, + to_js, +) + +if TYPE_CHECKING: + from typing_extensions import Buffer + +from .request import EmscriptenRequest +from .response import EmscriptenResponse + +""" +There are some headers that trigger unintended CORS preflight requests. +See also https://github.com/koenvo/pyodide-http/issues/22 +""" +HEADERS_TO_IGNORE = ("user-agent",) + +SUCCESS_HEADER = -1 +SUCCESS_EOF = -2 +ERROR_TIMEOUT = -3 +ERROR_EXCEPTION = -4 + +_STREAMING_WORKER_CODE = ( + files(__package__) + .joinpath("emscripten_fetch_worker.js") + .read_text(encoding="utf-8") +) + + +class _RequestError(Exception): + def __init__( + self, + message: str | None = None, + *, + request: EmscriptenRequest | None = None, + response: EmscriptenResponse | None = None, + ): + self.request = request + self.response = response + self.message = message + super().__init__(self.message) + + +class _StreamingError(_RequestError): + pass + + +class _TimeoutError(_RequestError): + pass + + +def _obj_from_dict(dict_val: dict[str, Any]) -> JsProxy: + return to_js(dict_val, dict_converter=js.Object.fromEntries) + + +class _ReadStream(io.RawIOBase): + def __init__( + self, + int_buffer: JsArray, + byte_buffer: JsArray, + timeout: float, + worker: JsProxy, + connection_id: int, + request: EmscriptenRequest, + ): + self.int_buffer = int_buffer + self.byte_buffer = byte_buffer + self.read_pos = 0 + self.read_len = 0 + self.connection_id = connection_id + self.worker = worker + self.timeout = int(1000 * timeout) if timeout > 0 else None + self.is_live = True + self._is_closed = False + self.request: EmscriptenRequest | None = request + + def __del__(self) -> None: + self.close() + + # this is compatible with _base_connection + def is_closed(self) -> bool: + return self._is_closed + + # for compatibility with RawIOBase + @property + def closed(self) -> bool: + return self.is_closed() + + def close(self) -> None: + if self.is_closed(): + return + self.read_len = 0 + self.read_pos = 0 + self.int_buffer = None + self.byte_buffer = None + self._is_closed = True + self.request = None + if self.is_live: + self.worker.postMessage(_obj_from_dict({"close": self.connection_id})) + self.is_live = False + super().close() + + def readable(self) -> bool: + return True + + def writable(self) -> bool: + return False + + def seekable(self) -> bool: + return False + + def readinto(self, byte_obj: Buffer) -> int: + if not self.int_buffer: + raise _StreamingError( + "No buffer for stream in _ReadStream.readinto", + request=self.request, + response=None, + ) + if self.read_len == 0: + # wait for the worker to send something + js.Atomics.store(self.int_buffer, 0, ERROR_TIMEOUT) + self.worker.postMessage(_obj_from_dict({"getMore": self.connection_id})) + if ( + js.Atomics.wait(self.int_buffer, 0, ERROR_TIMEOUT, self.timeout) + == "timed-out" + ): + raise _TimeoutError + data_len = self.int_buffer[0] + if data_len > 0: + self.read_len = data_len + self.read_pos = 0 + elif data_len == ERROR_EXCEPTION: + string_len = self.int_buffer[1] + # decode the error string + js_decoder = js.TextDecoder.new() + json_str = js_decoder.decode(self.byte_buffer.slice(0, string_len)) + raise _StreamingError( + f"Exception thrown in fetch: {json_str}", + request=self.request, + response=None, + ) + else: + # EOF, free the buffers and return zero + # and free the request + self.is_live = False + self.close() + return 0 + # copy from int32array to python bytes + ret_length = min(self.read_len, len(memoryview(byte_obj))) + subarray = self.byte_buffer.subarray( + self.read_pos, self.read_pos + ret_length + ).to_py() + memoryview(byte_obj)[0:ret_length] = subarray + self.read_len -= ret_length + self.read_pos += ret_length + return ret_length + + +class _StreamingFetcher: + def __init__(self) -> None: + # make web-worker and data buffer on startup + self.streaming_ready = False + + js_data_blob = js.Blob.new( + to_js([_STREAMING_WORKER_CODE], create_pyproxies=False), + _obj_from_dict({"type": "application/javascript"}), + ) + + def promise_resolver(js_resolve_fn: JsProxy, js_reject_fn: JsProxy) -> None: + def onMsg(e: JsProxy) -> None: + self.streaming_ready = True + js_resolve_fn(e) + + def onErr(e: JsProxy) -> None: + js_reject_fn(e) # Defensive: never happens in ci + + self.js_worker.onmessage = onMsg + self.js_worker.onerror = onErr + + js_data_url = js.URL.createObjectURL(js_data_blob) + self.js_worker = js.globalThis.Worker.new(js_data_url) + self.js_worker_ready_promise = js.globalThis.Promise.new(promise_resolver) + + def send(self, request: EmscriptenRequest) -> EmscriptenResponse: + headers = { + k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE + } + + body = request.body + fetch_data = {"headers": headers, "body": to_js(body), "method": request.method} + # start the request off in the worker + timeout = int(1000 * request.timeout) if request.timeout > 0 else None + js_shared_buffer = js.SharedArrayBuffer.new(1048576) + js_int_buffer = js.Int32Array.new(js_shared_buffer) + js_byte_buffer = js.Uint8Array.new(js_shared_buffer, 8) + + js.Atomics.store(js_int_buffer, 0, ERROR_TIMEOUT) + js.Atomics.notify(js_int_buffer, 0) + js_absolute_url = js.URL.new(request.url, js.location).href + self.js_worker.postMessage( + _obj_from_dict( + { + "buffer": js_shared_buffer, + "url": js_absolute_url, + "fetchParams": fetch_data, + } + ) + ) + # wait for the worker to send something + js.Atomics.wait(js_int_buffer, 0, ERROR_TIMEOUT, timeout) + if js_int_buffer[0] == ERROR_TIMEOUT: + raise _TimeoutError( + "Timeout connecting to streaming request", + request=request, + response=None, + ) + elif js_int_buffer[0] == SUCCESS_HEADER: + # got response + # header length is in second int of intBuffer + string_len = js_int_buffer[1] + # decode the rest to a JSON string + js_decoder = js.TextDecoder.new() + # this does a copy (the slice) because decode can't work on shared array + # for some silly reason + json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len)) + # get it as an object + response_obj = json.loads(json_str) + return EmscriptenResponse( + request=request, + status_code=response_obj["status"], + headers=response_obj["headers"], + body=_ReadStream( + js_int_buffer, + js_byte_buffer, + request.timeout, + self.js_worker, + response_obj["connectionID"], + request, + ), + ) + elif js_int_buffer[0] == ERROR_EXCEPTION: + string_len = js_int_buffer[1] + # decode the error string + js_decoder = js.TextDecoder.new() + json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len)) + raise _StreamingError( + f"Exception thrown in fetch: {json_str}", request=request, response=None + ) + else: + raise _StreamingError( + f"Unknown status from worker in fetch: {js_int_buffer[0]}", + request=request, + response=None, + ) + + +class _JSPIReadStream(io.RawIOBase): + """ + A read stream that uses pyodide.ffi.run_sync to read from a JavaScript fetch + response. This requires support for WebAssembly JavaScript Promise Integration + in the containing browser, and for pyodide to be launched via runPythonAsync. + + :param js_read_stream: + The JavaScript stream reader + + :param timeout: + Timeout in seconds + + :param request: + The request we're handling + + :param response: + The response this stream relates to + + :param js_abort_controller: + A JavaScript AbortController object, used for timeouts + """ + + def __init__( + self, + js_read_stream: Any, + timeout: float, + request: EmscriptenRequest, + response: EmscriptenResponse, + js_abort_controller: Any, # JavaScript AbortController for timeouts + ): + self.js_read_stream = js_read_stream + self.timeout = timeout + self._is_closed = False + self._is_done = False + self.request: EmscriptenRequest | None = request + self.response: EmscriptenResponse | None = response + self.current_buffer = None + self.current_buffer_pos = 0 + self.js_abort_controller = js_abort_controller + + def __del__(self) -> None: + self.close() + + # this is compatible with _base_connection + def is_closed(self) -> bool: + return self._is_closed + + # for compatibility with RawIOBase + @property + def closed(self) -> bool: + return self.is_closed() + + def close(self) -> None: + if self.is_closed(): + return + self.read_len = 0 + self.read_pos = 0 + self.js_read_stream.cancel() + self.js_read_stream = None + self._is_closed = True + self._is_done = True + self.request = None + self.response = None + super().close() + + def readable(self) -> bool: + return True + + def writable(self) -> bool: + return False + + def seekable(self) -> bool: + return False + + def _get_next_buffer(self) -> bool: + result_js = _run_sync_with_timeout( + self.js_read_stream.read(), + self.timeout, + self.js_abort_controller, + request=self.request, + response=self.response, + ) + if result_js.done: + self._is_done = True + return False + else: + self.current_buffer = result_js.value.to_py() + self.current_buffer_pos = 0 + return True + + def readinto(self, byte_obj: Buffer) -> int: + if self.current_buffer is None: + if not self._get_next_buffer() or self.current_buffer is None: + self.close() + return 0 + ret_length = min( + len(byte_obj), len(self.current_buffer) - self.current_buffer_pos + ) + byte_obj[0:ret_length] = self.current_buffer[ + self.current_buffer_pos : self.current_buffer_pos + ret_length + ] + self.current_buffer_pos += ret_length + if self.current_buffer_pos == len(self.current_buffer): + self.current_buffer = None + return ret_length + + +# check if we are in a worker or not +def is_in_browser_main_thread() -> bool: + return hasattr(js, "window") and hasattr(js, "self") and js.self == js.window + + +def is_cross_origin_isolated() -> bool: + return hasattr(js, "crossOriginIsolated") and js.crossOriginIsolated + + +def is_in_node() -> bool: + return ( + hasattr(js, "process") + and hasattr(js.process, "release") + and hasattr(js.process.release, "name") + and js.process.release.name == "node" + ) + + +def is_worker_available() -> bool: + return hasattr(js, "Worker") and hasattr(js, "Blob") + + +_fetcher: _StreamingFetcher | None = None + +if is_worker_available() and ( + (is_cross_origin_isolated() and not is_in_browser_main_thread()) + and (not is_in_node()) +): + _fetcher = _StreamingFetcher() +else: + _fetcher = None + + +NODE_JSPI_ERROR = ( + "urllib3 only works in Node.js with pyodide.runPythonAsync" + " and requires the flag --experimental-wasm-stack-switching in " + " versions of node <24." +) + + +def send_streaming_request(request: EmscriptenRequest) -> EmscriptenResponse | None: + if has_jspi(): + return send_jspi_request(request, True) + elif is_in_node(): + raise _RequestError( + message=NODE_JSPI_ERROR, + request=request, + response=None, + ) + + if _fetcher and streaming_ready(): + return _fetcher.send(request) + else: + _show_streaming_warning() + return None + + +_SHOWN_TIMEOUT_WARNING = False + + +def _show_timeout_warning() -> None: + global _SHOWN_TIMEOUT_WARNING + if not _SHOWN_TIMEOUT_WARNING: + _SHOWN_TIMEOUT_WARNING = True + message = "Warning: Timeout is not available on main browser thread" + js.console.warn(message) + + +_SHOWN_STREAMING_WARNING = False + + +def _show_streaming_warning() -> None: + global _SHOWN_STREAMING_WARNING + if not _SHOWN_STREAMING_WARNING: + _SHOWN_STREAMING_WARNING = True + message = "Can't stream HTTP requests because: \n" + if not is_cross_origin_isolated(): + message += " Page is not cross-origin isolated\n" + if is_in_browser_main_thread(): + message += " Python is running in main browser thread\n" + if not is_worker_available(): + message += " Worker or Blob classes are not available in this environment." # Defensive: this is always False in browsers that we test in + if streaming_ready() is False: + message += """ Streaming fetch worker isn't ready. If you want to be sure that streaming fetch +is working, you need to call: 'await urllib3.contrib.emscripten.fetch.wait_for_streaming_ready()`""" + from js import console + + console.warn(message) + + +def send_request(request: EmscriptenRequest) -> EmscriptenResponse: + if has_jspi(): + return send_jspi_request(request, False) + elif is_in_node(): + raise _RequestError( + message=NODE_JSPI_ERROR, + request=request, + response=None, + ) + try: + js_xhr = js.XMLHttpRequest.new() + + if not is_in_browser_main_thread(): + js_xhr.responseType = "arraybuffer" + if request.timeout: + js_xhr.timeout = int(request.timeout * 1000) + else: + js_xhr.overrideMimeType("text/plain; charset=ISO-8859-15") + if request.timeout: + # timeout isn't available on the main thread - show a warning in console + # if it is set + _show_timeout_warning() + + js_xhr.open(request.method, request.url, False) + for name, value in request.headers.items(): + if name.lower() not in HEADERS_TO_IGNORE: + js_xhr.setRequestHeader(name, value) + + js_xhr.send(to_js(request.body)) + + headers = dict(Parser().parsestr(js_xhr.getAllResponseHeaders())) + + if not is_in_browser_main_thread(): + body = js_xhr.response.to_py().tobytes() + else: + body = js_xhr.response.encode("ISO-8859-15") + return EmscriptenResponse( + status_code=js_xhr.status, headers=headers, body=body, request=request + ) + except JsException as err: + if err.name == "TimeoutError": + raise _TimeoutError(err.message, request=request) + elif err.name == "NetworkError": + raise _RequestError(err.message, request=request) + else: + # general http error + raise _RequestError(err.message, request=request) + + +def send_jspi_request( + request: EmscriptenRequest, streaming: bool +) -> EmscriptenResponse: + """ + Send a request using WebAssembly JavaScript Promise Integration + to wrap the asynchronous JavaScript fetch api (experimental). + + :param request: + Request to send + + :param streaming: + Whether to stream the response + + :return: The response object + :rtype: EmscriptenResponse + """ + timeout = request.timeout + js_abort_controller = js.AbortController.new() + headers = {k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE} + req_body = request.body + fetch_data = { + "headers": headers, + "body": to_js(req_body), + "method": request.method, + "signal": js_abort_controller.signal, + } + # Call JavaScript fetch (async api, returns a promise) + fetcher_promise_js = js.fetch(request.url, _obj_from_dict(fetch_data)) + # Now suspend WebAssembly until we resolve that promise + # or time out. + response_js = _run_sync_with_timeout( + fetcher_promise_js, + timeout, + js_abort_controller, + request=request, + response=None, + ) + headers = {} + header_iter = response_js.headers.entries() + while True: + iter_value_js = header_iter.next() + if getattr(iter_value_js, "done", False): + break + else: + headers[str(iter_value_js.value[0])] = str(iter_value_js.value[1]) + status_code = response_js.status + body: bytes | io.RawIOBase = b"" + + response = EmscriptenResponse( + status_code=status_code, headers=headers, body=b"", request=request + ) + if streaming: + # get via inputstream + if response_js.body is not None: + # get a reader from the fetch response + body_stream_js = response_js.body.getReader() + body = _JSPIReadStream( + body_stream_js, timeout, request, response, js_abort_controller + ) + else: + # get directly via arraybuffer + # n.b. this is another async JavaScript call. + body = _run_sync_with_timeout( + response_js.arrayBuffer(), + timeout, + js_abort_controller, + request=request, + response=response, + ).to_py() + response.body = body + return response + + +def _run_sync_with_timeout( + promise: Any, + timeout: float, + js_abort_controller: Any, + request: EmscriptenRequest | None, + response: EmscriptenResponse | None, +) -> Any: + """ + Await a JavaScript promise synchronously with a timeout which is implemented + via the AbortController + + :param promise: + Javascript promise to await + + :param timeout: + Timeout in seconds + + :param js_abort_controller: + A JavaScript AbortController object, used on timeout + + :param request: + The request being handled + + :param response: + The response being handled (if it exists yet) + + :raises _TimeoutError: If the request times out + :raises _RequestError: If the request raises a JavaScript exception + + :return: The result of awaiting the promise. + """ + timer_id = None + if timeout > 0: + timer_id = js.setTimeout( + js_abort_controller.abort.bind(js_abort_controller), int(timeout * 1000) + ) + try: + from pyodide.ffi import run_sync + + # run_sync here uses WebAssembly JavaScript Promise Integration to + # suspend python until the JavaScript promise resolves. + return run_sync(promise) + except JsException as err: + if err.name == "AbortError": + raise _TimeoutError( + message="Request timed out", request=request, response=response + ) + else: + raise _RequestError(message=err.message, request=request, response=response) + finally: + if timer_id is not None: + js.clearTimeout(timer_id) + + +def has_jspi() -> bool: + """ + Return true if jspi can be used. + + This requires both browser support and also WebAssembly + to be in the correct state - i.e. that the javascript + call into python was async not sync. + + :return: True if jspi can be used. + :rtype: bool + """ + try: + from pyodide.ffi import can_run_sync, run_sync # noqa: F401 + + return bool(can_run_sync()) + except ImportError: + return False + + +def streaming_ready() -> bool | None: + if _fetcher: + return _fetcher.streaming_ready + else: + return None # no fetcher, return None to signify that + + +async def wait_for_streaming_ready() -> bool: + if _fetcher: + await _fetcher.js_worker_ready_promise + return True + else: + return False diff --git a/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/request.py b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/request.py new file mode 100644 index 00000000..e692e692 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/request.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from ..._base_connection import _TYPE_BODY + + +@dataclass +class EmscriptenRequest: + method: str + url: str + params: dict[str, str] | None = None + body: _TYPE_BODY | None = None + headers: dict[str, str] = field(default_factory=dict) + timeout: float = 0 + decode_content: bool = True + + def set_header(self, name: str, value: str) -> None: + self.headers[name.capitalize()] = value + + def set_body(self, body: _TYPE_BODY | None) -> None: + self.body = body diff --git a/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/response.py b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/response.py new file mode 100644 index 00000000..b32b4023 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/urllib3/contrib/emscripten/response.py @@ -0,0 +1,285 @@ +from __future__ import annotations + +import json as _json +import logging +import typing +from contextlib import contextmanager +from dataclasses import dataclass +from http.client import HTTPException as HTTPException +from io import BytesIO, IOBase + +from ...exceptions import InvalidHeader, TimeoutError +from ...response import BaseHTTPResponse +from ...util.retry import Retry +from .request import EmscriptenRequest + +if typing.TYPE_CHECKING: + from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection + +log = logging.getLogger(__name__) + + +@dataclass +class EmscriptenResponse: + status_code: int + headers: dict[str, str] + body: IOBase | bytes + request: EmscriptenRequest + + +class EmscriptenHttpResponseWrapper(BaseHTTPResponse): + def __init__( + self, + internal_response: EmscriptenResponse, + url: str | None = None, + connection: BaseHTTPConnection | BaseHTTPSConnection | None = None, + ): + self._pool = None # set by pool class + self._body = None + self._response = internal_response + self._url = url + self._connection = connection + self._closed = False + super().__init__( + headers=internal_response.headers, + status=internal_response.status_code, + request_url=url, + version=0, + version_string="HTTP/?", + reason="", + decode_content=True, + ) + self.length_remaining = self._init_length(self._response.request.method) + self.length_is_certain = False + + @property + def url(self) -> str | None: + return self._url + + @url.setter + def url(self, url: str | None) -> None: + self._url = url + + @property + def connection(self) -> BaseHTTPConnection | BaseHTTPSConnection | None: + return self._connection + + @property + def retries(self) -> Retry | None: + return self._retries + + @retries.setter + def retries(self, retries: Retry | None) -> None: + # Override the request_url if retries has a redirect location. + self._retries = retries + + def stream( + self, amt: int | None = 2**16, decode_content: bool | None = None + ) -> typing.Generator[bytes]: + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + while True: + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + else: + break + + def _init_length(self, request_method: str | None) -> int | None: + length: int | None + content_length: str | None = self.headers.get("content-length") + + if content_length is not None: + try: + # RFC 7230 section 3.3.2 specifies multiple content lengths can + # be sent in a single Content-Length header + # (e.g. Content-Length: 42, 42). This line ensures the values + # are all valid ints and that as long as the `set` length is 1, + # all values are the same. Otherwise, the header is invalid. + lengths = {int(val) for val in content_length.split(",")} + if len(lengths) > 1: + raise InvalidHeader( + "Content-Length contained multiple " + "unmatching values (%s)" % content_length + ) + length = lengths.pop() + except ValueError: + length = None + else: + if length < 0: + length = None + + else: # if content_length is None + length = None + + # Check for responses that shouldn't include a body + if ( + self.status in (204, 304) + or 100 <= self.status < 200 + or request_method == "HEAD" + ): + length = 0 + + return length + + def read( + self, + amt: int | None = None, + decode_content: bool | None = None, # ignored because browser decodes always + cache_content: bool = False, + ) -> bytes: + if ( + self._closed + or self._response is None + or (isinstance(self._response.body, IOBase) and self._response.body.closed) + ): + return b"" + + with self._error_catcher(): + # body has been preloaded as a string by XmlHttpRequest + if not isinstance(self._response.body, IOBase): + self.length_remaining = len(self._response.body) + self.length_is_certain = True + # wrap body in IOStream + self._response.body = BytesIO(self._response.body) + if amt is not None and amt >= 0: + # don't cache partial content + cache_content = False + data = self._response.body.read(amt) + if self.length_remaining is not None: + self.length_remaining = max(self.length_remaining - len(data), 0) + if (self.length_is_certain and self.length_remaining == 0) or len( + data + ) < amt: + # definitely finished reading, close response stream + self._response.body.close() + return typing.cast(bytes, data) + else: # read all we can (and cache it) + data = self._response.body.read() + if cache_content: + self._body = data + if self.length_remaining is not None: + self.length_remaining = max(self.length_remaining - len(data), 0) + if len(data) == 0 or ( + self.length_is_certain and self.length_remaining == 0 + ): + # definitely finished reading, close response stream + self._response.body.close() + return typing.cast(bytes, data) + + def read_chunked( + self, + amt: int | None = None, + decode_content: bool | None = None, + ) -> typing.Generator[bytes]: + # chunked is handled by browser + while True: + bytes = self.read(amt, decode_content) + if not bytes: + break + yield bytes + + def release_conn(self) -> None: + if not self._pool or not self._connection: + return None + + self._pool._put_conn(self._connection) + self._connection = None + + def drain_conn(self) -> None: + self.close() + + @property + def data(self) -> bytes: + if self._body: + return self._body + else: + return self.read(cache_content=True) + + def json(self) -> typing.Any: + """ + Deserializes the body of the HTTP response as a Python object. + + The body of the HTTP response must be encoded using UTF-8, as per + `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_. + + To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to + your custom decoder instead. + + If the body of the HTTP response is not decodable to UTF-8, a + `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a + valid JSON document, a `json.JSONDecodeError` will be raised. + + Read more :ref:`here <json_content>`. + + :returns: The body of the HTTP response as a Python object. + """ + data = self.data.decode("utf-8") + return _json.loads(data) + + def close(self) -> None: + if not self._closed: + if isinstance(self._response.body, IOBase): + self._response.body.close() + if self._connection: + self._connection.close() + self._connection = None + self._closed = True + + @contextmanager + def _error_catcher(self) -> typing.Generator[None]: + """ + Catch Emscripten specific exceptions thrown by fetch.py, + instead re-raising urllib3 variants, so that low-level exceptions + are not leaked in the high-level api. + + On exit, release the connection back to the pool. + """ + from .fetch import _RequestError, _TimeoutError # avoid circular import + + clean_exit = False + + try: + yield + # If no exception is thrown, we should avoid cleaning up + # unnecessarily. + clean_exit = True + except _TimeoutError as e: + raise TimeoutError(str(e)) + except _RequestError as e: + raise HTTPException(str(e)) + finally: + # If we didn't terminate cleanly, we need to throw away our + # connection. + if not clean_exit: + # The response may not be closed but we're not going to use it + # anymore so close it now + if ( + isinstance(self._response.body, IOBase) + and not self._response.body.closed + ): + self._response.body.close() + # release the connection back to the pool + self.release_conn() + else: + # If we have read everything from the response stream, + # return the connection back to the pool. + if ( + isinstance(self._response.body, IOBase) + and self._response.body.closed + ): + self.release_conn() diff --git a/.venv/lib/python3.12/site-packages/urllib3/contrib/pyopenssl.py b/.venv/lib/python3.12/site-packages/urllib3/contrib/pyopenssl.py new file mode 100644 index 00000000..ed654306 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/urllib3/contrib/pyopenssl.py @@ -0,0 +1,554 @@ +""" +Module for using pyOpenSSL as a TLS backend. This module was relevant before +the standard library ``ssl`` module supported SNI, but now that we've dropped +support for Python 2.7 all relevant Python versions support SNI so +**this module is no longer recommended**. + +This needs the following packages installed: + +* `pyOpenSSL`_ (tested with 16.0.0) +* `cryptography`_ (minimum 1.3.4, from pyopenssl) +* `idna`_ (minimum 2.0) + +However, pyOpenSSL depends on cryptography, so while we use all three directly here we +end up having relatively few packages required. + +You can install them with the following command: + +.. code-block:: bash + + $ python -m pip install pyopenssl cryptography idna + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this: + +.. code-block:: python + + try: + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() + except ImportError: + pass + +.. _pyopenssl: https://www.pyopenssl.org +.. _cryptography: https://cryptography.io +.. _idna: https://github.com/kjd/idna +""" + +from __future__ import annotations + +import OpenSSL.SSL # type: ignore[import-untyped] +from cryptography import x509 + +try: + from cryptography.x509 import UnsupportedExtension # type: ignore[attr-defined] +except ImportError: + # UnsupportedExtension is gone in cryptography >= 2.1.0 + class UnsupportedExtension(Exception): # type: ignore[no-redef] + pass + + +import logging +import ssl +import typing +from io import BytesIO +from socket import socket as socket_cls +from socket import timeout + +from .. import util + +if typing.TYPE_CHECKING: + from OpenSSL.crypto import X509 # type: ignore[import-untyped] + + +__all__ = ["inject_into_urllib3", "extract_from_urllib3"] + +# Map from urllib3 to PyOpenSSL compatible parameter-values. +_openssl_versions: dict[int, int] = { + util.ssl_.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined] + util.ssl_.PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined] + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} + +if hasattr(ssl, "PROTOCOL_TLSv1_1") and hasattr(OpenSSL.SSL, "TLSv1_1_METHOD"): + _openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD + +if hasattr(ssl, "PROTOCOL_TLSv1_2") and hasattr(OpenSSL.SSL, "TLSv1_2_METHOD"): + _openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD + + +_stdlib_to_openssl_verify = { + ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, + ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, + ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER + + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, +} +_openssl_to_stdlib_verify = {v: k for k, v in _stdlib_to_openssl_verify.items()} + +# The SSLvX values are the most likely to be missing in the future +# but we check them all just to be sure. +_OP_NO_SSLv2_OR_SSLv3: int = getattr(OpenSSL.SSL, "OP_NO_SSLv2", 0) | getattr( + OpenSSL.SSL, "OP_NO_SSLv3", 0 +) +_OP_NO_TLSv1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1", 0) +_OP_NO_TLSv1_1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_1", 0) +_OP_NO_TLSv1_2: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_2", 0) +_OP_NO_TLSv1_3: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_3", 0) + +_openssl_to_ssl_minimum_version: dict[int, int] = { + ssl.TLSVersion.MINIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3, + ssl.TLSVersion.TLSv1: _OP_NO_SSLv2_OR_SSLv3, + ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1, + ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1, + ssl.TLSVersion.TLSv1_3: ( + _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 + ), + ssl.TLSVersion.MAXIMUM_SUPPORTED: ( + _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 + ), +} +_openssl_to_ssl_maximum_version: dict[int, int] = { + ssl.TLSVersion.MINIMUM_SUPPORTED: ( + _OP_NO_SSLv2_OR_SSLv3 + | _OP_NO_TLSv1 + | _OP_NO_TLSv1_1 + | _OP_NO_TLSv1_2 + | _OP_NO_TLSv1_3 + ), + ssl.TLSVersion.TLSv1: ( + _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3 + ), + ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3, + ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_3, + ssl.TLSVersion.TLSv1_3: _OP_NO_SSLv2_OR_SSLv3, + ssl.TLSVersion.MAXIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3, +} + +# OpenSSL will only write 16K at a time +SSL_WRITE_BLOCKSIZE = 16384 + +orig_util_SSLContext = util.ssl_.SSLContext + + +log = logging.getLogger(__name__) + + +def inject_into_urllib3() -> None: + "Monkey-patch urllib3 with PyOpenSSL-backed SSL-support." + + _validate_dependencies_met() + + util.SSLContext = PyOpenSSLContext # type: ignore[assignment] + util.ssl_.SSLContext = PyOpenSSLContext # type: ignore[assignment] + util.IS_PYOPENSSL = True + util.ssl_.IS_PYOPENSSL = True + + +def extract_from_urllib3() -> None: + "Undo monkey-patching by :func:`inject_into_urllib3`." + + util.SSLContext = orig_util_SSLContext + util.ssl_.SSLContext = orig_util_SSLContext + util.IS_PYOPENSSL = False + util.ssl_.IS_PYOPENSSL = False + + +def _validate_dependencies_met() -> None: + """ + Verifies that PyOpenSSL's package-level dependencies have been met. + Throws `ImportError` if they are not met. + """ + # Method added in `cryptography==1.1`; not available in older versions + from cryptography.x509.extensions import Extensions + + if getattr(Extensions, "get_extension_for_class", None) is None: + raise ImportError( + "'cryptography' module missing required functionality. " + "Try upgrading to v1.3.4 or newer." + ) + + # pyOpenSSL 0.14 and above use cryptography for OpenSSL bindings. The _x509 + # attribute is only present on those versions. + from OpenSSL.crypto import X509 + + x509 = X509() + if getattr(x509, "_x509", None) is None: + raise ImportError( + "'pyOpenSSL' module missing required functionality. " + "Try upgrading to v0.14 or newer." + ) + + +def _dnsname_to_stdlib(name: str) -> str | None: + """ + Converts a dNSName SubjectAlternativeName field to the form used by the + standard library on the given Python version. + + Cryptography produces a dNSName as a unicode string that was idna-decoded + from ASCII bytes. We need to idna-encode that string to get it back, and + then on Python 3 we also need to convert to unicode via UTF-8 (the stdlib + uses PyUnicode_FromStringAndSize on it, which decodes via UTF-8). + + If the name cannot be idna-encoded then we return None signalling that + the name given should be skipped. + """ + + def idna_encode(name: str) -> bytes | None: + """ + Borrowed wholesale from the Python Cryptography Project. It turns out + that we can't just safely call `idna.encode`: it can explode for + wildcard names. This avoids that problem. + """ + import idna + + try: + for prefix in ["*.", "."]: + if name.startswith(prefix): + name = name[len(prefix) :] + return prefix.encode("ascii") + idna.encode(name) + return idna.encode(name) + except idna.core.IDNAError: + return None + + # Don't send IPv6 addresses through the IDNA encoder. + if ":" in name: + return name + + encoded_name = idna_encode(name) + if encoded_name is None: + return None + return encoded_name.decode("utf-8") + + +def get_subj_alt_name(peer_cert: X509) -> list[tuple[str, str]]: + """ + Given an PyOpenSSL certificate, provides all the subject alternative names. + """ + cert = peer_cert.to_cryptography() + + # We want to find the SAN extension. Ask Cryptography to locate it (it's + # faster than looping in Python) + try: + ext = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName).value + except x509.ExtensionNotFound: + # No such extension, return the empty list. + return [] + except ( + x509.DuplicateExtension, + UnsupportedExtension, + x509.UnsupportedGeneralNameType, + UnicodeError, + ) as e: + # A problem has been found with the quality of the certificate. Assume + # no SAN field is present. + log.warning( + "A problem was encountered with the certificate that prevented " + "urllib3 from finding the SubjectAlternativeName field. This can " + "affect certificate validation. The error was %s", + e, + ) + return [] + + # We want to return dNSName and iPAddress fields. We need to cast the IPs + # back to strings because the match_hostname function wants them as + # strings. + # Sadly the DNS names need to be idna encoded and then, on Python 3, UTF-8 + # decoded. This is pretty frustrating, but that's what the standard library + # does with certificates, and so we need to attempt to do the same. + # We also want to skip over names which cannot be idna encoded. + names = [ + ("DNS", name) + for name in map(_dnsname_to_stdlib, ext.get_values_for_type(x509.DNSName)) + if name is not None + ] + names.extend( + ("IP Address", str(name)) for name in ext.get_values_for_type(x509.IPAddress) + ) + + return names + + +class WrappedSocket: + """API-compatibility wrapper for Python OpenSSL's Connection-class.""" + + def __init__( + self, + connection: OpenSSL.SSL.Connection, + socket: socket_cls, + suppress_ragged_eofs: bool = True, + ) -> None: + self.connection = connection + self.socket = socket + self.suppress_ragged_eofs = suppress_ragged_eofs + self._io_refs = 0 + self._closed = False + + def fileno(self) -> int: + return self.socket.fileno() + + # Copy-pasted from Python 3.5 source code + def _decref_socketios(self) -> None: + if self._io_refs > 0: + self._io_refs -= 1 + if self._closed: + self.close() + + def recv(self, *args: typing.Any, **kwargs: typing.Any) -> bytes: + try: + data = self.connection.recv(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"): + return b"" + else: + raise OSError(e.args[0], str(e)) from e + except OpenSSL.SSL.ZeroReturnError: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return b"" + else: + raise + except OpenSSL.SSL.WantReadError as e: + if not util.wait_for_read(self.socket, self.socket.gettimeout()): + raise timeout("The read operation timed out") from e + else: + return self.recv(*args, **kwargs) + + # TLS 1.3 post-handshake authentication + except OpenSSL.SSL.Error as e: + raise ssl.SSLError(f"read error: {e!r}") from e + else: + return data # type: ignore[no-any-return] + + def recv_into(self, *args: typing.Any, **kwargs: typing.Any) -> int: + try: + return self.connection.recv_into(*args, **kwargs) # type: ignore[no-any-return] + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"): + return 0 + else: + raise OSError(e.args[0], str(e)) from e + except OpenSSL.SSL.ZeroReturnError: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return 0 + else: + raise + except OpenSSL.SSL.WantReadError as e: + if not util.wait_for_read(self.socket, self.socket.gettimeout()): + raise timeout("The read operation timed out") from e + else: + return self.recv_into(*args, **kwargs) + + # TLS 1.3 post-handshake authentication + except OpenSSL.SSL.Error as e: + raise ssl.SSLError(f"read error: {e!r}") from e + + def settimeout(self, timeout: float) -> None: + return self.socket.settimeout(timeout) + + def _send_until_done(self, data: bytes) -> int: + while True: + try: + return self.connection.send(data) # type: ignore[no-any-return] + except OpenSSL.SSL.WantWriteError as e: + if not util.wait_for_write(self.socket, self.socket.gettimeout()): + raise timeout() from e + continue + except OpenSSL.SSL.SysCallError as e: + raise OSError(e.args[0], str(e)) from e + + def sendall(self, data: bytes) -> None: + total_sent = 0 + while total_sent < len(data): + sent = self._send_until_done( + data[total_sent : total_sent + SSL_WRITE_BLOCKSIZE] + ) + total_sent += sent + + def shutdown(self, how: int) -> None: + try: + self.connection.shutdown() + except OpenSSL.SSL.Error as e: + raise ssl.SSLError(f"shutdown error: {e!r}") from e + + def close(self) -> None: + self._closed = True + if self._io_refs <= 0: + self._real_close() + + def _real_close(self) -> None: + try: + return self.connection.close() # type: ignore[no-any-return] + except OpenSSL.SSL.Error: + return + + def getpeercert( + self, binary_form: bool = False + ) -> dict[str, list[typing.Any]] | None: + x509 = self.connection.get_peer_certificate() + + if not x509: + return x509 # type: ignore[no-any-return] + + if binary_form: + return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509) # type: ignore[no-any-return] + + return { + "subject": ((("commonName", x509.get_subject().CN),),), # type: ignore[dict-item] + "subjectAltName": get_subj_alt_name(x509), + } + + def version(self) -> str: + return self.connection.get_protocol_version_name() # type: ignore[no-any-return] + + def selected_alpn_protocol(self) -> str | None: + alpn_proto = self.connection.get_alpn_proto_negotiated() + return alpn_proto.decode() if alpn_proto else None + + +WrappedSocket.makefile = socket_cls.makefile # type: ignore[attr-defined] + + +class PyOpenSSLContext: + """ + I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible + for translating the interface of the standard library ``SSLContext`` object + to calls into PyOpenSSL. + """ + + def __init__(self, protocol: int) -> None: + self.protocol = _openssl_versions[protocol] + self._ctx = OpenSSL.SSL.Context(self.protocol) + self._options = 0 + self.check_hostname = False + self._minimum_version: int = ssl.TLSVersion.MINIMUM_SUPPORTED + self._maximum_version: int = ssl.TLSVersion.MAXIMUM_SUPPORTED + + @property + def options(self) -> int: + return self._options + + @options.setter + def options(self, value: int) -> None: + self._options = value + self._set_ctx_options() + + @property + def verify_mode(self) -> int: + return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()] + + @verify_mode.setter + def verify_mode(self, value: ssl.VerifyMode) -> None: + self._ctx.set_verify(_stdlib_to_openssl_verify[value], _verify_callback) + + def set_default_verify_paths(self) -> None: + self._ctx.set_default_verify_paths() + + def set_ciphers(self, ciphers: bytes | str) -> None: + if isinstance(ciphers, str): + ciphers = ciphers.encode("utf-8") + self._ctx.set_cipher_list(ciphers) + + def load_verify_locations( + self, + cafile: str | None = None, + capath: str | None = None, + cadata: bytes | None = None, + ) -> None: + if cafile is not None: + cafile = cafile.encode("utf-8") # type: ignore[assignment] + if capath is not None: + capath = capath.encode("utf-8") # type: ignore[assignment] + try: + self._ctx.load_verify_locations(cafile, capath) + if cadata is not None: + self._ctx.load_verify_locations(BytesIO(cadata)) + except OpenSSL.SSL.Error as e: + raise ssl.SSLError(f"unable to load trusted certificates: {e!r}") from e + + def load_cert_chain( + self, + certfile: str, + keyfile: str | None = None, + password: str | None = None, + ) -> None: + try: + self._ctx.use_certificate_chain_file(certfile) + if password is not None: + if not isinstance(password, bytes): + password = password.encode("utf-8") # type: ignore[assignment] + self._ctx.set_passwd_cb(lambda *_: password) + self._ctx.use_privatekey_file(keyfile or certfile) + except OpenSSL.SSL.Error as e: + raise ssl.SSLError(f"Unable to load certificate chain: {e!r}") from e + + def set_alpn_protocols(self, protocols: list[bytes | str]) -> None: + protocols = [util.util.to_bytes(p, "ascii") for p in protocols] + return self._ctx.set_alpn_protos(protocols) # type: ignore[no-any-return] + + def wrap_socket( + self, + sock: socket_cls, + server_side: bool = False, + do_handshake_on_connect: bool = True, + suppress_ragged_eofs: bool = True, + server_hostname: bytes | str | None = None, + ) -> WrappedSocket: + cnx = OpenSSL.SSL.Connection(self._ctx, sock) + + # If server_hostname is an IP, don't use it for SNI, per RFC6066 Section 3 + if server_hostname and not util.ssl_.is_ipaddress(server_hostname): + if isinstance(server_hostname, str): + server_hostname = server_hostname.encode("utf-8") + cnx.set_tlsext_host_name(server_hostname) + + cnx.set_connect_state() + + while True: + try: + cnx.do_handshake() + except OpenSSL.SSL.WantReadError as e: + if not util.wait_for_read(sock, sock.gettimeout()): + raise timeout("select timed out") from e + continue + except OpenSSL.SSL.Error as e: + raise ssl.SSLError(f"bad handshake: {e!r}") from e + break + + return WrappedSocket(cnx, sock) + + def _set_ctx_options(self) -> None: + self._ctx.set_options( + self._options + | _openssl_to_ssl_minimum_version[self._minimum_version] + | _openssl_to_ssl_maximum_version[self._maximum_version] + ) + + @property + def minimum_version(self) -> int: + return self._minimum_version + + @minimum_version.setter + def minimum_version(self, minimum_version: int) -> None: + self._minimum_version = minimum_version + self._set_ctx_options() + + @property + def maximum_version(self) -> int: + return self._maximum_version + + @maximum_version.setter + def maximum_version(self, maximum_version: int) -> None: + self._maximum_version = maximum_version + self._set_ctx_options() + + +def _verify_callback( + cnx: OpenSSL.SSL.Connection, + x509: X509, + err_no: int, + err_depth: int, + return_code: int, +) -> bool: + return err_no == 0 diff --git a/.venv/lib/python3.12/site-packages/urllib3/contrib/socks.py b/.venv/lib/python3.12/site-packages/urllib3/contrib/socks.py new file mode 100644 index 00000000..c62b5e03 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/urllib3/contrib/socks.py @@ -0,0 +1,228 @@ +""" +This module contains provisional support for SOCKS proxies from within +urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and +SOCKS5. To enable its functionality, either install PySocks or install this +module with the ``socks`` extra. + +The SOCKS implementation supports the full range of urllib3 features. It also +supports the following SOCKS features: + +- SOCKS4A (``proxy_url='socks4a://...``) +- SOCKS4 (``proxy_url='socks4://...``) +- SOCKS5 with remote DNS (``proxy_url='socks5h://...``) +- SOCKS5 with local DNS (``proxy_url='socks5://...``) +- Usernames and passwords for the SOCKS proxy + +.. note:: + It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in + your ``proxy_url`` to ensure that DNS resolution is done from the remote + server instead of client-side when connecting to a domain name. + +SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5 +supports IPv4, IPv6, and domain names. + +When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url`` +will be sent as the ``userid`` section of the SOCKS request: + +.. code-block:: python + + proxy_url="socks4a://<userid>@proxy-host" + +When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion +of the ``proxy_url`` will be sent as the username/password to authenticate +with the proxy: + +.. code-block:: python + + proxy_url="socks5h://<username>:<password>@proxy-host" + +""" + +from __future__ import annotations + +try: + import socks # type: ignore[import-not-found] +except ImportError: + import warnings + + from ..exceptions import DependencyWarning + + warnings.warn( + ( + "SOCKS support in urllib3 requires the installation of optional " + "dependencies: specifically, PySocks. For more information, see " + "https://urllib3.readthedocs.io/en/latest/advanced-usage.html#socks-proxies" + ), + DependencyWarning, + ) + raise + +import typing +from socket import timeout as SocketTimeout + +from ..connection import HTTPConnection, HTTPSConnection +from ..connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from ..exceptions import ConnectTimeoutError, NewConnectionError +from ..poolmanager import PoolManager +from ..util.url import parse_url + +try: + import ssl +except ImportError: + ssl = None # type: ignore[assignment] + + +class _TYPE_SOCKS_OPTIONS(typing.TypedDict): + socks_version: int + proxy_host: str | None + proxy_port: str | None + username: str | None + password: str | None + rdns: bool + + +class SOCKSConnection(HTTPConnection): + """ + A plain-text HTTP connection that connects via a SOCKS proxy. + """ + + def __init__( + self, + _socks_options: _TYPE_SOCKS_OPTIONS, + *args: typing.Any, + **kwargs: typing.Any, + ) -> None: + self._socks_options = _socks_options + super().__init__(*args, **kwargs) + + def _new_conn(self) -> socks.socksocket: + """ + Establish a new connection via the SOCKS proxy. + """ + extra_kw: dict[str, typing.Any] = {} + if self.source_address: + extra_kw["source_address"] = self.source_address + + if self.socket_options: + extra_kw["socket_options"] = self.socket_options + + try: + conn = socks.create_connection( + (self.host, self.port), + proxy_type=self._socks_options["socks_version"], + proxy_addr=self._socks_options["proxy_host"], + proxy_port=self._socks_options["proxy_port"], + proxy_username=self._socks_options["username"], + proxy_password=self._socks_options["password"], + proxy_rdns=self._socks_options["rdns"], + timeout=self.timeout, + **extra_kw, + ) + + except SocketTimeout as e: + raise ConnectTimeoutError( + self, + f"Connection to {self.host} timed out. (connect timeout={self.timeout})", + ) from e + + except socks.ProxyError as e: + # This is fragile as hell, but it seems to be the only way to raise + # useful errors here. + if e.socket_err: + error = e.socket_err + if isinstance(error, SocketTimeout): + raise ConnectTimeoutError( + self, + f"Connection to {self.host} timed out. (connect timeout={self.timeout})", + ) from e + else: + # Adding `from e` messes with coverage somehow, so it's omitted. + # See #2386. + raise NewConnectionError( + self, f"Failed to establish a new connection: {error}" + ) + else: + raise NewConnectionError( + self, f"Failed to establish a new connection: {e}" + ) from e + + except OSError as e: # Defensive: PySocks should catch all these. + raise NewConnectionError( + self, f"Failed to establish a new connection: {e}" + ) from e + + return conn + + +# We don't need to duplicate the Verified/Unverified distinction from +# urllib3/connection.py here because the HTTPSConnection will already have been +# correctly set to either the Verified or Unverified form by that module. This +# means the SOCKSHTTPSConnection will automatically be the correct type. +class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection): + pass + + +class SOCKSHTTPConnectionPool(HTTPConnectionPool): + ConnectionCls = SOCKSConnection + + +class SOCKSHTTPSConnectionPool(HTTPSConnectionPool): + ConnectionCls = SOCKSHTTPSConnection + + +class SOCKSProxyManager(PoolManager): + """ + A version of the urllib3 ProxyManager that routes connections via the + defined SOCKS proxy. + """ + + pool_classes_by_scheme = { + "http": SOCKSHTTPConnectionPool, + "https": SOCKSHTTPSConnectionPool, + } + + def __init__( + self, + proxy_url: str, + username: str | None = None, + password: str | None = None, + num_pools: int = 10, + headers: typing.Mapping[str, str] | None = None, + **connection_pool_kw: typing.Any, + ): + parsed = parse_url(proxy_url) + + if username is None and password is None and parsed.auth is not None: + split = parsed.auth.split(":") + if len(split) == 2: + username, password = split + if parsed.scheme == "socks5": + socks_version = socks.PROXY_TYPE_SOCKS5 + rdns = False + elif parsed.scheme == "socks5h": + socks_version = socks.PROXY_TYPE_SOCKS5 + rdns = True + elif parsed.scheme == "socks4": + socks_version = socks.PROXY_TYPE_SOCKS4 + rdns = False + elif parsed.scheme == "socks4a": + socks_version = socks.PROXY_TYPE_SOCKS4 + rdns = True + else: + raise ValueError(f"Unable to determine SOCKS version from {proxy_url}") + + self.proxy_url = proxy_url + + socks_options = { + "socks_version": socks_version, + "proxy_host": parsed.host, + "proxy_port": parsed.port, + "username": username, + "password": password, + "rdns": rdns, + } + connection_pool_kw["_socks_options"] = socks_options + + super().__init__(num_pools, headers, **connection_pool_kw) + + self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme |