1 files changed, 347 insertions, 0 deletions
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch
new file mode 100644
index 0000000000..b7dcaefad3
--- /dev/null
+++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch
@@ -0,0 +1,347 @@
+From fd634998f813340768c333cdad638498602856e5 Mon Sep 17 00:00:00 2001
+From: Ulya Trofimovich <skvadrik@gmail.com>
+Date: Tue, 21 Apr 2020 21:28:32 +0100
+Subject: [PATCH] Rewrite recursion into iteration (Tarjan's SCC algorithm and
+ YYFILL states).
+
+This is to avoid stack overflow on large RE (especially on instrumented
+builds that have larger stack frames, like AddressSanitizer).
+
+Stack overflow reported by Agostino Sarubbo.
+Related to #219 "overflow-1.re test fails on system with small stack".
+
+Upstram-Status: Backport:
+https://github.com/skvadrik/re2c/commit/fd634998f813340768c333cdad638498602856e5
+
+CVE: CVE-2018-21232
+
+Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
+---
+diff --git a/src/dfa/fillpoints.cc b/src/dfa/fillpoints.cc
+--- a/src/dfa/fillpoints.cc	(revision e58939b34bb4c37cd990f82dc286f21cb405743e)
++++ b/src/dfa/fillpoints.cc	(date 1646929180243)
+@@ -5,151 +5,186 @@
+ 
+ #include "src/dfa/dfa.h"
+ 
+-namespace re2c
+-{
++
++/*
++ * note [finding strongly connected components of DFA]
++ *
++ * A slight modification of Tarjan's algorithm.
++ *
++ * The algorithm traverses the DFA in depth-first order. It maintains a stack
++ * of states that have already been visited but haven't been assigned to an SCC
++ * yet. For each state the algorithm calculates 'lowlink': index of the highest
++ * ancestor state reachable in one step from a descendant of this state.
++ * Lowlink is used to determine when a set of states should be popped off stack
++ * into a new SCC.
++ *
++ * We use lowlink to hold different kinds of information:
++ *   - values in range [0 .. stack size] mean that the state is on stack (a
++ *     link to a state with the smallest index reachable from this one)
++ *   - SCC_UND means that this state has not been visited yet
++ *   - SCC_INF means that this state has already been popped off stack
++ *
++ * We use stack size (rather than topological sort index) as a unique index of
++ * the state on stack. This is safe because the indices of states on stack are
++ * unique and less than the indices of states that have been popped off stack
++ * (SCC_INF).
++ */
++
++namespace re2c {
++    namespace {
+ 
+-static const size_t SCC_INF = std::numeric_limits<size_t>::max();
+-static const size_t SCC_UND = SCC_INF - 1;
++        static const size_t SCC_INF = std::numeric_limits<size_t>::max();
++        static const size_t SCC_UND = SCC_INF - 1;
+ 
+-static bool loopback(size_t node, size_t narcs, const size_t *arcs)
+-{
+-	for (size_t i = 0; i < narcs; ++i)
+-	{
+-		if (arcs[i] == node)
+-		{
+-			return true;
+-		}
+-	}
+-	return false;
+-}
++        static bool loopback(size_t state, size_t narcs, const size_t *arcs)
++        {
++            for (size_t i = 0; i < narcs; ++i) {
++                if (arcs[i] == state) return true;
++            }
++            return false;
++        }
+ 
+-/*
+- * node [finding strongly connected components of DFA]
+- *
+- * A slight modification of Tarjan's algorithm.
+- *
+- * The algorithm walks graph in deep-first order. It maintains a stack
+- * of nodes that have already been visited but haven't been assigned to
+- * SCC yet. For each node the algorithm calculates 'lowlink': index of
+- * the highest ancestor node reachable in one step from a descendant of
+- * the node. Lowlink is used to determine when a set of nodes should be
+- * popped off the stack into a new SCC.
+- *
+- * We use lowlink to hold different kinds of information:
+- *   - values in range [0 .. stack size] mean that this node is on stack
+- *     (link to a node with the smallest index reachable from this one)
+- *   - SCC_UND means that this node has not been visited yet
+- *   - SCC_INF means that this node has already been popped off stack
+- *
+- * We use stack size (rather than topological sort index) as unique index
+- * of a node on stack. This is safe because indices of nodes on stack are
+- * still unique and less than indices of nodes that have been popped off
+- * stack (SCC_INF).
+- *
+- */
+-static void scc(
+-	const dfa_t &dfa,
+-	std::stack<size_t> &stack,
+-	std::vector<size_t> &lowlink,
+-	std::vector<bool> &trivial,
+-	size_t i)
+-{
+-	const size_t link = stack.size();
+-	lowlink[i] = link;
+-	stack.push(i);
++        struct StackItem {
++            size_t state;  // current state
++            size_t symbol; // next arc to be visited in this state
++            size_t link;   // Tarjan's "lowlink"
++        };
++
++// Tarjan's algorithm
++        static void scc(const dfa_t &dfa, std::vector<bool> &trivial,
++                        std::vector<StackItem> &stack_dfs)
++        {
++            std::vector<size_t> lowlink(dfa.states.size(), SCC_UND);
++            std::stack<size_t> stack;
++
++            StackItem x0 = {0, 0, 0};
++            stack_dfs.push_back(x0);
++
++            while (!stack_dfs.empty()) {
++                const size_t i = stack_dfs.back().state;
++                size_t c = stack_dfs.back().symbol;
++                size_t link = stack_dfs.back().link;
++                stack_dfs.pop_back();
++
++                const size_t *arcs = dfa.states[i]->arcs;
++
++                if (c == 0) {
++                    // DFS recursive enter
++                    //DASSERT(lowlink[i] == SCC_UND);
++                    link = lowlink[i] = stack.size();
++                    stack.push(i);
++                }
++                else {
++                    // DFS recursive return (from one of successor states)
++                    const size_t j = arcs[c - 1];
++                    //DASSERT(lowlink[j] != SCC_UND);
++                    lowlink[i] = std::min(lowlink[i], lowlink[j]);
++                }
+ 
+-	const size_t *arcs = dfa.states[i]->arcs;
+-	for (size_t c = 0; c < dfa.nchars; ++c)
+-	{
+-		const size_t j = arcs[c];
+-		if (j != dfa_t::NIL)
+-		{
+-			if (lowlink[j] == SCC_UND)
+-			{
+-				scc(dfa, stack, lowlink, trivial, j);
+-			}
+-			if (lowlink[j] < lowlink[i])
+-			{
+-				lowlink[i] = lowlink[j];
+-			}
+-		}
+-	}
++                // find the next successor state that hasn't been visited yet
++                for (; c < dfa.nchars; ++c) {
++                    const size_t j = arcs[c];
++                    if (j != dfa_t::NIL) {
++                        if (lowlink[j] == SCC_UND) {
++                            break;
++                        }
++                        lowlink[i] = std::min(lowlink[i], lowlink[j]);
++                    }
++                }
+ 
+-	if (lowlink[i] == link)
+-	{
+-		// SCC is non-trivial (has loops) iff it either:
+-		//   - consists of multiple nodes (they all must be interconnected)
+-		//   - consists of single node which loops back to itself
+-		trivial[i] = i == stack.top()
+-			&& !loopback(i, dfa.nchars, arcs);
++                if (c < dfa.nchars) {
++                    // recurse into the next successor state
++                    StackItem x1 = {i, c + 1, link};
++                    stack_dfs.push_back(x1);
++                    StackItem x2 = {arcs[c], 0, SCC_UND};
++                    stack_dfs.push_back(x2);
++                }
++                else if (lowlink[i] == link) {
++                    // all successors have been visited
++                    // SCC is non-trivial (has loops) if either:
++                    //   - it contains multiple interconnected states
++                    //   - it contains a single self-looping state
++                    trivial[i] = i == stack.top() && !loopback(i, dfa.nchars, arcs);
+ 
+-		size_t j;
+-		do
+-		{
+-			j = stack.top();
+-			stack.pop();
+-			lowlink[j] = SCC_INF;
+-		}
+-		while (j != i);
+-	}
+-}
++                    for (;;) {
++                        const size_t j = stack.top();
++                        stack.pop();
++                        lowlink[j] = SCC_INF;
++                        if (i == j) break;
++                    }
++                }
++            }
++        }
+ 
+-static void calc_fill(
+-	const dfa_t &dfa,
+-	const std::vector<bool> &trivial,
+-	std::vector<size_t> &fill,
+-	size_t i)
+-{
+-	if (fill[i] == SCC_UND)
+-	{
+-		fill[i] = 0;
+-		const size_t *arcs = dfa.states[i]->arcs;
+-		for (size_t c = 0; c < dfa.nchars; ++c)
+-		{
+-			const size_t j = arcs[c];
+-			if (j != dfa_t::NIL)
+-			{
+-				calc_fill(dfa, trivial, fill, j);
+-				size_t max = 1;
+-				if (trivial[j])
+-				{
+-					max += fill[j];
+-				}
+-				if (max > fill[i])
+-				{
+-					fill[i] = max;
+-				}
+-			}
+-		}
+-	}
+-}
+-
+-void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
+-{
+-	const size_t size = dfa.states.size();
+-
+-	// find DFA states that belong to non-trivial SCC
+-	std::stack<size_t> stack;
+-	std::vector<size_t> lowlink(size, SCC_UND);
+-	std::vector<bool> trivial(size, false);
+-	scc(dfa, stack, lowlink, trivial, 0);
+-
+-	// for each DFA state, calculate YYFILL argument:
+-	// maximal path length to the next YYFILL state
+-	fill.resize(size, SCC_UND);
+-	calc_fill(dfa, trivial, fill, 0);
++        static void calc_fill(const dfa_t &dfa, const std::vector<bool> &trivial,
++                              std::vector<StackItem> &stack_dfs, std::vector<size_t> &fill)
++        {
++            const size_t nstates = dfa.states.size();
++            fill.resize(nstates, SCC_UND);
++
++            StackItem x0 = {0, 0, SCC_INF};
++            stack_dfs.push_back(x0);
++
++            while (!stack_dfs.empty()) {
++                const size_t i = stack_dfs.back().state;
++                size_t c = stack_dfs.back().symbol;
++                stack_dfs.pop_back();
++
++                const size_t *arcs = dfa.states[i]->arcs;
++
++                if (c == 0) {
++                    // DFS recursive enter
++                    if (fill[i] != SCC_UND) continue;
++                    fill[i] = 0;
++                }
++                else {
++                    // DFS recursive return (from one of successor states)
++                    const size_t j = arcs[c - 1];
++                    //DASSERT(fill[i] != SCC_UND && fill[j] != SCC_UND);
++                    fill[i] = std::max(fill[i], 1 + (trivial[j] ? fill[j] : 0));
++                }
++
++                // find the next successor state that hasn't been visited yet
++                for (; c < dfa.nchars; ++c) {
++                    const size_t j = arcs[c];
++                    if (j != dfa_t::NIL) break;
++                }
++
++                if (c < dfa.nchars) {
++                    // recurse into the next successor state
++                    StackItem x1 = {i, c + 1, SCC_INF};
++                    stack_dfs.push_back(x1);
++                    StackItem x2 = {arcs[c], 0, SCC_INF};
++                    stack_dfs.push_back(x2);
++                }
++            }
+ 
+-	// The following states must trigger YYFILL:
+-	//   - inital state
+-	//   - all states in non-trivial SCCs
+-	// for other states, reset YYFILL argument to zero
+-	for (size_t i = 1; i < size; ++i)
+-	{
+-		if (trivial[i])
+-		{
+-			fill[i] = 0;
+-		}
+-	}
+-}
++            // The following states must trigger YYFILL:
++            //   - inital state
++            //   - all states in non-trivial SCCs
++            // for other states, reset YYFILL argument to zero
++            for (size_t i = 1; i < nstates; ++i) {
++                if (trivial[i]) {
++                    fill[i] = 0;
++                }
++            }
++        }
+ 
++    } // anonymous namespace
++
++    void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
++    {
++        const size_t nstates = dfa.states.size();
++        std::vector<bool> trivial(nstates, false);
++        std::vector<StackItem> stack_dfs;
++        stack_dfs.reserve(nstates);
++
++        // find DFA states that belong to non-trivial SCC
++        scc(dfa, trivial, stack_dfs);
++
++        // for each DFA state, calculate YYFILL argument:
++        // maximal path length to the next YYFILL state
++        calc_fill(dfa, trivial, stack_dfs, fill);
++    }
++
+ } // namespace re2c