From aff951b7a41eb5b958b32c49eaa00da02adc9c2d Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Tue, 21 Jan 2020 22:32:56 +0400 Subject: [PATCH] Optimize _encode_invalid_chars (#1787) Co-authored-by: Seth Michael Larson Upstream-Status: Backport [from git://github.com/urllib3/urllib3.git commit:a2697e7c6b] Signed-off-by: Haiqing Bai --- src/urllib3/util/url.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index 9675f74..e353937 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -216,18 +216,15 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): component = six.ensure_text(component) + # Normalize existing percent-encoded bytes. # Try to see if the component we're encoding is already percent-encoded # so we can skip all '%' characters but still encode all others. - percent_encodings = PERCENT_RE.findall(component) - - # Normalize existing percent-encoded bytes. - for enc in percent_encodings: - if not enc.isupper(): - component = component.replace(enc, enc.upper()) + component, percent_encodings = PERCENT_RE.subn( + lambda match: match.group(0).upper(), component + ) uri_bytes = component.encode("utf-8", "surrogatepass") - is_percent_encoded = len(percent_encodings) == uri_bytes.count(b"%") - + is_percent_encoded = percent_encodings == uri_bytes.count(b"%") encoded_component = bytearray() for i in range(0, len(uri_bytes)): @@ -237,7 +234,7 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): if (is_percent_encoded and byte == b"%") or ( byte_ord < 128 and byte.decode() in allowed_chars ): - encoded_component.extend(byte) + encoded_component += byte continue encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper())) -- 2.23.0