1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
From ba043ef4f2c713662f89425aed70dfd78e3955ee Mon Sep 17 00:00:00 2001
From: Patrick Welche <prlw1@cam.ac.uk>
Date: Mon, 23 Oct 2017 13:59:58 +0100
Subject: [PATCH] glib-mkenums: best effort attempt on non-utf8 encoded files.
Some source files aren't valid utf-8 containing for example
iso8859-1 accented characters in author's names.
Replace invalid data with a replacement '?' character and print a
warning to keep things working.
Based on a patch from Christoph Reiter in
https://bugzilla.gnome.org/show_bug.cgi?id=785113#c20
Upstream-Status: Submitted [https://bug785113.bugzilla-attachments.gnome.org/attachment.cgi?id=362098]
Author: Patrick Welche <prlw1@cam.ac.uk>
Signed-off-by: Jackie Huang <jackie.huang@windriver.com>
---
gobject/glib-mkenums.in | 41 ++++++++++++++++++++++++++++++-----------
1 file changed, 30 insertions(+), 11 deletions(-)
diff --git a/gobject/glib-mkenums.in b/gobject/glib-mkenums.in
index 7cc55053c..9790a65a2 100755
--- a/gobject/glib-mkenums.in
+++ b/gobject/glib-mkenums.in
@@ -26,14 +26,6 @@ the GNU General Public License which can be found in the
GLib source package. Sources, examples and contact
information are available at http://www.gtk.org'''
-# Python 2 defaults to ASCII in case stdout is redirected.
-# This should make it match Python 3, which uses the locale encoding.
-if sys.stdout.encoding is None:
- output_stream = codecs.getwriter(
- locale.getpreferredencoding())(sys.stdout)
-else:
- output_stream = sys.stdout
-
# pylint: disable=too-few-public-methods
class Color:
'''ANSI Terminal colors'''
@@ -81,6 +73,31 @@ def write_output(output):
global output_stream
print(output, file=output_stream)
+
+# Python 2 defaults to ASCII in case stdout is redirected.
+# This should make it match Python 3, which uses the locale encoding.
+if sys.stdout.encoding is None:
+ output_stream = codecs.getwriter(
+ locale.getpreferredencoding())(sys.stdout)
+else:
+ output_stream = sys.stdout
+
+
+# Some source files aren't utf-8 and the old perl version didn't care.
+# Replace invalid data with a replacement character to keep things working.
+# https://bugzilla.gnome.org/show_bug.cgi?id=785113#c20
+decoding_errors = "replace_and_warn"
+
+def replace_and_warn(err):
+ # 7 characters of context either side of the offending character
+ print_warning('UnicodeWarning: {} at {} ({})'.format(
+ err.reason, err.start,
+ err.object[err.start - 7:err.end + 7]))
+ return ('?', err.end)
+
+codecs.register_error('replace_and_warn', replace_and_warn)
+
+
# glib-mkenums.py
# Information about the current enumeration
flags = None # Is enumeration a bitmask?
@@ -157,7 +174,8 @@ def parse_entries(file, file_name):
m = re.match(r'\#include\s*<([^>]*)>', line)
if m:
newfilename = os.path.join("..", m.group(1))
- newfile = io.open(newfilename, encoding="utf-8")
+ newfile = io.open(newfilename, encoding="utf-8",
+ errors=decoding_errors)
if not parse_entries(newfile, newfilename):
return False
@@ -253,7 +271,7 @@ def read_template_file(file):
}
in_ = 'junk'
- ifile = io.open(file, encoding="utf-8")
+ ifile = io.open(file, encoding="utf-8", errors=decoding_errors)
for line in ifile:
m = re.match(r'\/\*\*\*\s+(BEGIN|END)\s+([\w-]+)\s+\*\*\*\/', line)
if m:
@@ -408,7 +426,8 @@ def process_file(curfilename):
firstenum = True
try:
- curfile = io.open(curfilename, encoding="utf-8")
+ curfile = io.open(curfilename, encoding="utf-8",
+ errors=decoding_errors)
except IOError as e:
if e.errno == errno.ENOENT:
print_warning('No file "{}" found.'.format(curfilename))
--
2.14.2
|