meta/recipes-core/glib-2.0/glib-2.0/glib-mkenums-replace-and-warn-decoding.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104

From ba043ef4f2c713662f89425aed70dfd78e3955ee Mon Sep 17 00:00:00 2001
From: Patrick Welche <prlw1@cam.ac.uk>
Date: Mon, 23 Oct 2017 13:59:58 +0100
Subject: [PATCH] glib-mkenums: best effort attempt on non-utf8 encoded files.

Some source files aren't valid utf-8 containing for example
iso8859-1 accented characters in author's names.
Replace invalid data with a replacement '?' character and print a
warning to keep things working.
Based on a patch from Christoph Reiter in
https://bugzilla.gnome.org/show_bug.cgi?id=785113#c20

Upstream-Status: Submitted [https://bug785113.bugzilla-attachments.gnome.org/attachment.cgi?id=362098]

Author: Patrick Welche <prlw1@cam.ac.uk>

Signed-off-by: Jackie Huang <jackie.huang@windriver.com>
---
 gobject/glib-mkenums.in | 41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/gobject/glib-mkenums.in b/gobject/glib-mkenums.in
index 7cc55053c..9790a65a2 100755
--- a/gobject/glib-mkenums.in
+++ b/gobject/glib-mkenums.in
@@ -26,14 +26,6 @@ the GNU General Public License which can be found in the
 GLib source package. Sources, examples and contact
 information are available at http://www.gtk.org'''
 
-# Python 2 defaults to ASCII in case stdout is redirected.
-# This should make it match Python 3, which uses the locale encoding.
-if sys.stdout.encoding is None:
-    output_stream = codecs.getwriter(
-        locale.getpreferredencoding())(sys.stdout)
-else:
-    output_stream = sys.stdout
-
 # pylint: disable=too-few-public-methods
 class Color:
     '''ANSI Terminal colors'''
@@ -81,6 +73,31 @@ def write_output(output):
     global output_stream
     print(output, file=output_stream)
 
+
+# Python 2 defaults to ASCII in case stdout is redirected.
+# This should make it match Python 3, which uses the locale encoding.
+if sys.stdout.encoding is None:
+    output_stream = codecs.getwriter(
+        locale.getpreferredencoding())(sys.stdout)
+else:
+    output_stream = sys.stdout
+
+
+# Some source files aren't utf-8 and the old perl version didn't care.
+# Replace invalid data with a replacement character to keep things working.
+# https://bugzilla.gnome.org/show_bug.cgi?id=785113#c20
+decoding_errors = "replace_and_warn"
+
+def replace_and_warn(err):
+    # 7 characters of context either side of the offending character
+    print_warning('UnicodeWarning: {} at {} ({})'.format(
+        err.reason, err.start,
+        err.object[err.start - 7:err.end + 7]))
+    return ('?', err.end)
+
+codecs.register_error('replace_and_warn', replace_and_warn)
+
+
 # glib-mkenums.py
 # Information about the current enumeration
 flags = None # Is enumeration a bitmask?
@@ -157,7 +174,8 @@ def parse_entries(file, file_name):
         m = re.match(r'\#include\s*<([^>]*)>', line)
         if m:
             newfilename = os.path.join("..", m.group(1))
-            newfile = io.open(newfilename, encoding="utf-8")
+            newfile = io.open(newfilename, encoding="utf-8",
+                              errors=decoding_errors)
 
             if not parse_entries(newfile, newfilename):
                 return False
@@ -253,7 +271,7 @@ def read_template_file(file):
            }
     in_ = 'junk'
 
-    ifile = io.open(file, encoding="utf-8")
+    ifile = io.open(file, encoding="utf-8", errors=decoding_errors)
     for line in ifile:
         m = re.match(r'\/\*\*\*\s+(BEGIN|END)\s+([\w-]+)\s+\*\*\*\/', line)
         if m:
@@ -408,7 +426,8 @@ def process_file(curfilename):
     firstenum = True
 
     try:
-        curfile = io.open(curfilename, encoding="utf-8")
+        curfile = io.open(curfilename, encoding="utf-8",
+                          errors=decoding_errors)
     except IOError as e:
         if e.errno == errno.ENOENT:
             print_warning('No file "{}" found.'.format(curfilename))
-- 
2.14.2