aboutsummaryrefslogtreecommitdiffstats
path: root/lib/bs4/dammit.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/bs4/dammit.py')
-rw-r--r--lib/bs4/dammit.py20
1 files changed, 6 insertions, 14 deletions
diff --git a/lib/bs4/dammit.py b/lib/bs4/dammit.py
index 68d419feb..7ad9e0dd1 100644
--- a/lib/bs4/dammit.py
+++ b/lib/bs4/dammit.py
@@ -8,12 +8,10 @@ XML or HTML to reflect a new encoding; that's the tree builder's job.
"""
__license__ = "MIT"
-from pdb import set_trace
import codecs
from html.entities import codepoint2name
import re
import logging
-import string
# Import a library to autodetect character encodings.
chardet_type = None
@@ -38,16 +36,10 @@ except ImportError:
def chardet_dammit(s):
return None
-# Available from http://cjkpython.i18n.org/.
-try:
- import iconv_codec
-except ImportError:
- pass
-
xml_encoding_re = re.compile(
- '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
+ r'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
html_meta_re = re.compile(
- '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
+ r'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
class EntitySubstitution(object):
@@ -80,11 +72,11 @@ class EntitySubstitution(object):
">": "gt",
}
- BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
- "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
- ")")
+ BARE_AMPERSAND_OR_BRACKET = re.compile(r"([<>]|"
+ r"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+ r")")
- AMPERSAND_OR_BRACKET = re.compile("([<>&])")
+ AMPERSAND_OR_BRACKET = re.compile(r"([<>&])")
@classmethod
def _substitute_html_entity(cls, matchobj):