summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/perl/files/CVE-2020-12723.patch
blob: ad195cffab0441a3a281e63d5ff09e55b6ffae20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
From da9ec461e22915ccabb06785bf39ec34577ada12 Mon Sep 17 00:00:00 2001
From: Hugo van der Sanden <hv@crypt.org>
Date: Sat, 11 Apr 2020 14:10:24 +0100
Subject: [PATCH] study_chunk: avoid mutating regexp program within GOSUB

gh16947 and gh17743: studying GOSUB may restudy in an inner call
(via a mix of recursion and enframing) something that an outer call
is in the middle of looking at.  Let the outer frame deal with it.

(CVE-2020-12723)

(cherry picked from commit c4033e740bd18d9fbe3456a9db2ec2053cdc5271)

Upstream-Status: Backport [https://github.com/perl/perl5/commit/66bbb51b93253a3f87d11c2695cfb7bdb782184a]
CVE: CVE-2020-12723
Signed-off-by: Chee Yang Lee <chee.yang.lee@intel.com>

---
 embed.fnc  |  2 +-
 embed.h    |  2 +-
 proto.h    |  2 +-
 regcomp.c  | 54 +++++++++++++++++++++++++++++++++++-------------------
 t/re/pat.t | 26 +++++++++++++++++++++++++-
 5 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index f45c127..eff4a50 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2480,7 +2480,7 @@ Es	|SSize_t|study_chunk	|NN RExC_state_t *pRExC_state \
 				|NULLOK struct scan_data_t *data \
                                 |I32 stopparen|U32 recursed_depth \
 				|NULLOK regnode_ssc *and_withp \
-				|U32 flags|U32 depth
+				|U32 flags|U32 depth|bool was_mutate_ok
 Es	|void	|rck_elide_nothing|NN regnode *node
 EsR	|SV *	|get_ANYOFM_contents|NN const regnode * n
 EsRn	|U32	|add_data	|NN RExC_state_t* const pRExC_state \
diff --git a/embed.h b/embed.h
index 356a8b9..5346ec5 100644
--- a/embed.h
+++ b/embed.h
@@ -1239,7 +1239,7 @@
 #define ssc_is_cp_posixl_init	S_ssc_is_cp_posixl_init
 #define ssc_or(a,b,c)		S_ssc_or(aTHX_ a,b,c)
 #define ssc_union(a,b,c)	S_ssc_union(aTHX_ a,b,c)
-#define study_chunk(a,b,c,d,e,f,g,h,i,j,k)	S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
+#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l)	S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
 #  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C)
 #define _invlist_dump(a,b,c,d)	Perl__invlist_dump(aTHX_ a,b,c,d)
diff --git a/proto.h b/proto.h
index 91530b1..1bda01f 100644
--- a/proto.h
+++ b/proto.h
@@ -5655,7 +5655,7 @@ PERL_STATIC_INLINE void	S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, c
 #define PERL_ARGS_ASSERT_SSC_UNION	\
 	assert(ssc); assert(invlist)
 #endif
-STATIC SSize_t	S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth);
+STATIC SSize_t	S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok);
 #define PERL_ARGS_ASSERT_STUDY_CHUNK	\
 	assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
 #endif
diff --git a/regcomp.c b/regcomp.c
index 5a9adee..8d7df1f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -106,6 +106,7 @@ typedef struct scan_frame {
     regnode *next_regnode;      /* next node to process when last is reached */
     U32 prev_recursed_depth;
     I32 stopparen;              /* what stopparen do we use */
+    bool in_gosub;              /* this or an outer frame is for GOSUB */
 
     struct scan_frame *this_prev_frame; /* this previous frame */
     struct scan_frame *prev_frame;      /* previous frame */
@@ -4466,7 +4467,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 			I32 stopparen,
                         U32 recursed_depth,
 			regnode_ssc *and_withp,
-			U32 flags, U32 depth)
+			U32 flags, U32 depth, bool was_mutate_ok)
 			/* scanp: Start here (read-write). */
 			/* deltap: Write maxlen-minlen here. */
 			/* last: Stop before this one. */
@@ -4545,6 +4546,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                    node length to get a real minimum (because
                                    the folded version may be shorter) */
 	bool unfolded_multi_char = FALSE;
+        /* avoid mutating ops if we are anywhere within the recursed or
+         * enframed handling for a GOSUB: the outermost level will handle it.
+         */
+        bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
 	/* Peephole optimizer: */
         DEBUG_STUDYDATA("Peep", data, depth, is_inf);
         DEBUG_PEEP("Peep", scan, depth, flags);
@@ -4555,7 +4560,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
          * parsing code, as each (?:..) is handled by a different invocation of
          * reg() -- Yves
          */
-        JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
+        if (mutate_ok)
+            JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
 
         /* Follow the next-chain of the current node and optimize
            away all the NOTHINGs from it.
@@ -4587,7 +4593,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             /* DEFINEP study_chunk() recursion */
             (void)study_chunk(pRExC_state, &scan, &minlen,
                               &deltanext, next, &data_fake, stopparen,
-                              recursed_depth, NULL, f, depth+1);
+                              recursed_depth, NULL, f, depth+1, mutate_ok);
 
             scan = next;
         } else
@@ -4655,7 +4661,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     /* recurse study_chunk() for each BRANCH in an alternation */
 		    minnext = study_chunk(pRExC_state, &scan, minlenp,
                                       &deltanext, next, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1);
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
 
 		    if (min1 > minnext)
 			min1 = minnext;
@@ -4722,9 +4729,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		    }
 		}
 
-                if (PERL_ENABLE_TRIE_OPTIMISATION &&
-                        OP( startbranch ) == BRANCH )
-                {
+                if (PERL_ENABLE_TRIE_OPTIMISATION
+                    && OP(startbranch) == BRANCH
+                    && mutate_ok
+                ) {
 		/* demq.
 
                    Assuming this was/is a branch we are dealing with: 'scan'
@@ -5179,6 +5187,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 newframe->stopparen = stopparen;
                 newframe->prev_recursed_depth = recursed_depth;
                 newframe->this_prev_frame= frame;
+                newframe->in_gosub = (
+                    (frame && frame->in_gosub) || OP(scan) == GOSUB
+                );
 
                 DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
                 DEBUG_PEEP("fnew", scan, depth, flags);
@@ -5336,7 +5347,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
                 /* This temporary node can now be turned into EXACTFU, and
                  * must, as regexec.c doesn't handle it */
-                if (OP(next) == EXACTFU_S_EDGE) {
+                if (OP(next) == EXACTFU_S_EDGE && mutate_ok) {
                     OP(next) = EXACTFU;
                 }
 
@@ -5344,8 +5355,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     &&   isALPHA_A(* STRING(next))
                     && (         OP(next) == EXACTFAA
                         || (     OP(next) == EXACTFU
-                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next)))))
-                {
+                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
+                    &&   mutate_ok
+                ) {
                     /* These differ in just one bit */
                     U8 mask = ~ ('A' ^ 'a');
 
@@ -5432,7 +5444,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                   (mincount == 0
                                    ? (f & ~SCF_DO_SUBSTR)
                                    : f)
-                                  ,depth+1);
+                                  , depth+1, mutate_ok);
 
 		if (flags & SCF_DO_STCLASS)
 		    data->start_class = oclass;
@@ -5498,7 +5510,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		if (  OP(oscan) == CURLYX && data
 		      && data->flags & SF_IN_PAR
 		      && !(data->flags & SF_HAS_EVAL)
-		      && !deltanext && minnext == 1 ) {
+		      && !deltanext && minnext == 1
+                      && mutate_ok
+                ) {
 		    /* Try to optimize to CURLYN.  */
 		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
 		    regnode * const nxt1 = nxt;
@@ -5548,10 +5562,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		      && !(data->flags & SF_HAS_EVAL)
 		      && !deltanext	/* atom is fixed width */
 		      && minnext != 0	/* CURLYM can't handle zero width */
-
                          /* Nor characters whose fold at run-time may be
                           * multi-character */
                       && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
+                      && mutate_ok
 		) {
 		    /* XXXX How to optimize if data == 0? */
 		    /* Optimize to a simpler form.  */
@@ -5604,7 +5618,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         /* recurse study_chunk() on optimised CURLYX => CURLYM */
 			study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
                                     NULL, stopparen, recursed_depth, NULL, 0,
-                                    depth+1);
+                                    depth+1, mutate_ok);
 		    }
 		    else
 			oscan->flags = 0;
@@ -6009,7 +6023,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 /* recurse study_chunk() for lookahead body */
                 minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
                                       last, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1);
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
                 if (scan->flags) {
                     if (   deltanext < 0
                         || deltanext > (I32) U8_MAX
@@ -6114,7 +6129,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                         &deltanext, last, &data_fake,
                                         stopparen, recursed_depth, NULL,
-                                        f, depth+1);
+                                        f, depth+1, mutate_ok);
                 if (scan->flags) {
                     assert(0);  /* This code has never been tested since this
                                    is normally not compiled */
@@ -6282,7 +6297,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                         /* optimise study_chunk() for TRIE */
                         minnext = study_chunk(pRExC_state, &scan, minlenp,
                             &deltanext, (regnode *)nextbranch, &data_fake,
-                            stopparen, recursed_depth, NULL, f, depth+1);
+                            stopparen, recursed_depth, NULL, f, depth+1,
+                            mutate_ok);
                     }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                         nextbranch= regnext((regnode*)nextbranch);
@@ -8075,7 +8091,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             &data, -1, 0, NULL,
             SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
                           | (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
-            0);
+            0, TRUE);
 
 
         CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
@@ -8204,7 +8220,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
                                                       ? SCF_TRIE_DOING_RESTUDY
                                                       : 0),
-            0);
+            0, TRUE);
 
         CHECK_RESTUDY_GOTO_butfirst(NOOP);
 
diff --git a/t/re/pat.t b/t/re/pat.t
index 6a868f4..2869b58 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -25,7 +25,7 @@ BEGIN {
 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
 skip_all_without_unicode_tables();
 
-plan tests => 864;  # Update this when adding/deleting tests.
+plan tests => 873;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -2115,6 +2115,30 @@ x{0c!}\;\;îçÿ /0f/!F/;îçÿù\Qxÿÿÿÿù`x{0c!};ù\Q
         like(runperl(prog => "$s", stderr => 1), qr/Unmatched \(/);
    }
 
+    # gh16947: test regexp corruption (GOSUB)
+    {
+        fresh_perl_is(q{
+            'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok'
+        }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)');
+    }
+    # gh16947: test fix doesn't break SUSPEND
+    {
+        fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' },
+                'ok', {}, "gh16947: test fix doesn't break SUSPEND");
+    }
+
+    # gh17743: more regexp corruption via GOSUB
+    {
+        fresh_perl_is(q{
+            "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok"
+        }, 'ok', {}, 'gh17743: test regexp corruption (1)');
+
+        fresh_perl_is(q{
+            "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/;
+            print "ok"
+        }, 'ok', {}, 'gh17743: test regexp corruption (2)');
+    }
+
 } # End of sub run_tests
 
 1;