aboutsummaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/perl/perl/CVE-2018-18314.patch
blob: e84e7bc4e4c2b41a29403896ffe23b2d1555bb40 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
From 6a2d07f43ae7cfcb2eb30cf39751f2f7fed7ecc1 Mon Sep 17 00:00:00 2001
From: Yves Orton <demerphq@gmail.com>
Date: Mon, 26 Jun 2017 13:19:55 +0200
Subject: [PATCH 3/3] fix #131649 - extended charclass can trigger assert

The extended charclass parser makes some assumptions during the
first pass which are only true on well structured input, and it
does not properly catch various errors. later on the code assumes
that things the first pass will let through are valid, when in
fact they should trigger errors.

(cherry picked from commit 19a498a461d7c81ae3507c450953d1148efecf4f)

CVE: CVE-2018-18314
Upstream-Status: Backport
[https://perl5.git.perl.org/perl.git/commit/dabe076af345ab4512ea80245b4e4cd7ec0996cd]

Signed-off-by: Dan Tran <dantran@microsoft.com>
---
 pod/perldiag.pod        | 27 ++++++++++++++++++++++++++-
 pod/perlrecharclass.pod |  4 ++--
 regcomp.c               | 23 +++++++++++++----------
 t/lib/warnings/regcomp  |  6 +++---
 t/re/reg_mesg.t         | 29 ++++++++++++++++-------------
 t/re/regex_sets.t       |  6 +++---
 6 files changed, 63 insertions(+), 32 deletions(-)

diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 737d3633f6..644b814008 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -5777,7 +5777,7 @@ yourself.
 a perl4 interpreter, especially if the next 2 tokens are "use strict"
 or "my $var" or "our $var".
 
-=item Syntax error in (?[...]) in regex m/%s/
+=item Syntax error in (?[...]) in regex; marked by <-- HERE in m/%s/
 
 (F) Perl could not figure out what you meant inside this construct; this
 notifies you that it is giving up trying.
@@ -6153,6 +6153,31 @@ for example,
 (F) The unexec() routine failed for some reason.  See your local FSF
 representative, who probably put it there in the first place.
 
+=item Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/%s/
+
+(F) While parsing an extended character class a ']' character was encountered
+at a point in the definition where the only legal use of ']' is to close the
+character class definition as part of a '])', you may have forgotten the close
+paren, or otherwise confused the parser.
+
+=item Expecting close paren for nested extended charclass in regex; marked by <-- HERE in m/%s/
+
+(F) While parsing a nested extended character class like:
+
+    (?[ ... (?flags:(?[ ... ])) ... ])
+                             ^
+
+we expected to see a close paren ')' (marked by ^) but did not.
+
+=item Expecting close paren for wrapper for nested extended charclass in regex; marked by <-- HERE in m/%s/
+
+(F) While parsing a nested extended character class like:
+
+    (?[ ... (?flags:(?[ ... ])) ... ])
+                              ^
+
+we expected to see a close paren ')' (marked by ^) but did not.
+
 =item Unexpected binary operator '%c' with no preceding operand in regex;
 marked by S<<-- HERE> in m/%s/
 
diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod
index 89f4a7ef3f..a557cc0384 100644
--- a/pod/perlrecharclass.pod
+++ b/pod/perlrecharclass.pod
@@ -1101,8 +1101,8 @@ hence both of the following work:
 Any contained POSIX character classes, including things like C<\w> and C<\D>
 respect the C<E<sol>a> (and C<E<sol>aa>) modifiers.
 
-C<< (?[ ]) >> is a regex-compile-time construct.  Any attempt to use
-something which isn't knowable at the time the containing regular
+Note that C<< (?[ ]) >> is a regex-compile-time construct.  Any attempt
+to use something which isn't knowable at the time the containing regular
 expression is compiled is a fatal error.  In practice, this means
 just three limitations:
 
diff --git a/regcomp.c b/regcomp.c
index 2688979882..cb8409ed27 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -14609,8 +14609,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                     TRUE /* Force /x */ );
 
             switch (*RExC_parse) {
-                case '?':
-                    if (RExC_parse[1] == '[') depth++, RExC_parse++;
+                case '(':
+                    if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
+                        depth++, RExC_parse+=2;
                     /* FALLTHROUGH */
                 default:
                     break;
@@ -14667,9 +14668,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                 }
 
                 case ']':
-                    if (depth--) break;
-                    RExC_parse++;
-                    if (*RExC_parse == ')') {
+                    if (RExC_parse[1] == ')') {
+                        RExC_parse++;
+                        if (depth--) break;
                         node = reganode(pRExC_state, ANYOF, 0);
                         RExC_size += ANYOF_SKIP;
                         nextchar(pRExC_state);
@@ -14681,20 +14682,20 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
 
                         return node;
                     }
-                    goto no_close;
+                    RExC_parse++;
+                    vFAIL("Unexpected ']' with no following ')' in (?[...");
             }
 
             RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
         }
 
-      no_close:
         /* We output the messages even if warnings are off, because we'll fail
          * the very next thing, and these give a likely diagnosis for that */
         if (posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
             output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
         }
 
-        FAIL("Syntax error in (?[...])");
+        vFAIL("Syntax error in (?[...])");
     }
 
     /* Pass 2 only after this. */
@@ -14868,12 +14869,14 @@ redo_curchar:
                      * inversion list, and RExC_parse points to the trailing
                      * ']'; the next character should be the ')' */
                     RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for nested extended charclass");
 
                     /* Then the ')' matching the original '(' handled by this
                      * case: statement */
                     RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for wrapper for nested extended charclass");
 
                     RExC_flags = save_flags;
                     goto handle_operand;
diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp
index 08cb27b00f..367276d0fc 100644
--- a/t/lib/warnings/regcomp
+++ b/t/lib/warnings/regcomp
@@ -59,21 +59,21 @@ Unmatched [ in regex; marked by <-- HERE in m/abc[ <-- HERE fi[.00./ at - line
 qr/(?[[[:word]]])/;
 EXPECT
 Assuming NOT a POSIX class since there is no terminating ':' in regex; marked by <-- HERE in m/(?[[[:word <-- HERE ]]])/ at - line 2.
-syntax error in (?[...]) in regex m/(?[[[:word]]])/ at - line 2.
+Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/(?[[[:word]] <-- HERE ])/ at - line 2.
 ########
 # NAME qr/(?[ [[:digit: ])/
 # OPTION fatal
 qr/(?[[[:digit: ])/;
 EXPECT
 Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[[:digit: ] <-- HERE )/ at - line 2.
-syntax error in (?[...]) in regex m/(?[[[:digit: ])/ at - line 2.
+syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[[:digit: ]) <-- HERE / at - line 2.
 ########
 # NAME qr/(?[ [:digit: ])/
 # OPTION fatal
 qr/(?[[:digit: ])/
 EXPECT
 Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[:digit: ] <-- HERE )/ at - line 2.
-syntax error in (?[...]) in regex m/(?[[:digit: ])/ at - line 2.
+syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[:digit: ]) <-- HERE / at - line 2.
 ########
 # NAME [perl #126141]
 # OPTION fatal
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index 658397ac27..08a3688e1d 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -202,8 +202,9 @@ my @death =
  '/\b{gc}/' => "'gc' is an unknown bound type {#} m/\\b{gc{#}}/",
  '/\B{gc}/' => "'gc' is an unknown bound type {#} m/\\B{gc{#}}/",
 
- '/(?[[[::]]])/' => "Syntax error in (?[...]) in regex m/(?[[[::]]])/",
- '/(?[[[:w:]]])/' => "Syntax error in (?[...]) in regex m/(?[[[:w:]]])/",
+
+ '/(?[[[::]]])/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[[[::]]{#}])/",
+ '/(?[[[:w:]]])/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[[[:w:]]{#}])/",
  '/(?[[:w:]])/' => "",
  '/[][[:alpha:]]' => "",    # [perl #127581]
  '/([.].*)[.]/'   => "",    # [perl #127582]
@@ -227,11 +228,12 @@ my @death =
  '/(?[ \p{foo} ])/' => 'Can\'t find Unicode property definition "foo" {#} m/(?[ \p{foo}{#} ])/',
  '/(?[ \p{ foo = bar } ])/' => 'Can\'t find Unicode property definition "foo = bar" {#} m/(?[ \p{ foo = bar }{#} ])/',
  '/(?[ \8 ])/' => 'Unrecognized escape \8 in character class {#} m/(?[ \8{#} ])/',
- '/(?[ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ]/',
- '/(?[ [ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ \t ]/',
- '/(?[ \t ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ] ]/',
- '/(?[ [ ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ ] ]/',
- '/(?[ \t + \e # This was supposed to be a comment ])/' => 'Syntax error in (?[...]) in regex m/(?[ \t + \e # This was supposed to be a comment ])/',
+ '/(?[ \t ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[ \\t ]{#}/",
+ '/(?[ [ \t ]/' => "Syntax error in (?[...]) {#} m/(?[ [ \\t ]{#}/",
+ '/(?[ \t ] ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[ \\t ]{#} ]/",
+ '/(?[ [ ] ]/' => "Syntax error in (?[...]) {#} m/(?[ [ ] ]{#}/",
+ '/(?[ \t + \e # This was supposed to be a comment ])/' =>
+    "Syntax error in (?[...]) {#} m/(?[ \\t + \\e # This was supposed to be a comment ]){#}/",
  '/(?[ ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ {#}])/',
  'm/(?[[a-\d]])/' => 'False [] range "a-\d" {#} m/(?[[a-\d{#}]])/',
  'm/(?[[\w-x]])/' => 'False [] range "\w-" {#} m/(?[[\w-{#}x]])/',
@@ -410,10 +412,10 @@ my @death_utf8 = mark_as_utf8(
 
  '/ネ\p{}ネ/' => 'Empty \p{} {#} m/ネ\p{{#}}ネ/',
 
- '/ネ(?[[[:ネ]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ]]])ネ/",
- '/ネ(?[[[:ネ: ])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ: ])ネ/",
- '/ネ(?[[[::]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[::]]])ネ/",
- '/ネ(?[[[:ネ:]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ:]]])ネ/",
+ '/ネ(?[[[:ネ]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[:ネ]]{#}])ネ/",
+ '/ネ(?[[[:ネ: ])ネ/' => "Syntax error in (?[...]) {#} m/ネ(?[[[:ネ: ])ネ{#}/",
+ '/ネ(?[[[::]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[::]]{#}])ネ/",
+ '/ネ(?[[[:ネ:]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[:ネ:]]{#}])ネ/",
  '/ネ(?[[:ネ:]])ネ/' => "",
  '/ネ(?[ネ])ネ/' =>  'Unexpected character {#} m/ネ(?[ネ{#}])ネ/',
  '/ネ(?[ + [ネ] ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/ネ(?[ +{#} [ネ] ])/',
@@ -426,8 +428,9 @@ my @death_utf8 = mark_as_utf8(
  '/(?[ \x{ネ} ])ネ/' => 'Non-hex character {#} m/(?[ \x{ネ{#}} ])ネ/',
  '/(?[ \p{ネ} ])/' => 'Can\'t find Unicode property definition "ネ" {#} m/(?[ \p{ネ}{#} ])/',
  '/(?[ \p{ ネ = bar } ])/' => 'Can\'t find Unicode property definition "ネ = bar" {#} m/(?[ \p{ ネ = bar }{#} ])/',
- '/ネ(?[ \t ]/' => 'Syntax error in (?[...]) in regex m/ネ(?[ \t ]/',
- '/(?[ \t + \e # ネ This was supposed to be a comment ])/' => 'Syntax error in (?[...]) in regex m/(?[ \t + \e # ネ This was supposed to be a comment ])/',
+ '/ネ(?[ \t ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[ \\t ]{#}/",
+ '/(?[ \t + \e # ネ This was supposed to be a comment ])/' =>
+    "Syntax error in (?[...]) {#} m/(?[ \\t + \\e # ネ This was supposed to be a comment ]){#}/",
  'm/(*ネ)ネ/' => q<Unknown verb pattern 'ネ' {#} m/(*ネ){#}ネ/>,
  '/\cネ/' => "Character following \"\\c\" must be printable ASCII",
  '/\b{ネ}/' => "'ネ' is an unknown bound type {#} m/\\b{ネ{#}}/",
diff --git a/t/re/regex_sets.t b/t/re/regex_sets.t
index 92875677be..60a126ba3c 100644
--- a/t/re/regex_sets.t
+++ b/t/re/regex_sets.t
@@ -157,13 +157,13 @@ for my $char ("٠", "٥", "٩") {
 	eval { $_ = '/(?[(\c]) /'; qr/$_/ };
 	like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic');
 	eval { $_ = '(?[\c#]' . "\n])"; qr/$_/ };
-	like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic');
+	like($@, qr/^Unexpected/, '/(?[(\c]) / should not panic');
 	eval { $_ = '(?[(\c])'; qr/$_/ };
 	like($@, qr/^Syntax error/, '/(?[(\c])/ should be a syntax error');
 	eval { $_ = '(?[(\c]) ]\b'; qr/$_/ };
-	like($@, qr/^Syntax error/, '/(?[(\c]) ]\b/ should be a syntax error');
+	like($@, qr/^Unexpected/, '/(?[(\c]) ]\b/ should be a syntax error');
 	eval { $_ = '(?[\c[]](])'; qr/$_/ };
-	like($@, qr/^Syntax error/, '/(?[\c[]](])/ should be a syntax error');
+	like($@, qr/^Unexpected/, '/(?[\c[]](])/ should be a syntax error');
 	like("\c#", qr/(?[\c#])/, '\c# should match itself');
 	like("\c[", qr/(?[\c[])/, '\c[ should match itself');
 	like("\c\ ", qr/(?[\c\])/, '\c\ should match itself');
-- 
2.22.0.vfs.1.1.57.gbaf16c8