summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/binutils/binutils/0020-CVE-2023-22608-1.patch
blob: 18d4ac5f9d424f7e15d753058f08546cca4632b4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
From 116aac1447ee92df25599859293752648e3c6ea0 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sesse@google.com>
Date: Fri, 20 May 2022 16:10:34 +0200
Subject: [PATCH] add a trie to map quickly from address range to compilation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

 unit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When using perf to profile large binaries, _bfd_dwarf2_find_nearest_line()
becomes a hotspot, as perf wants to get line number information
(for inline-detection purposes) for each and every sample. In Chromium
in particular (the content_shell binary), this entails going through
475k address ranges, which takes a long time when done repeatedly.

Add a radix-256 trie over the address space to quickly map address to
compilation unit spaces; for content_shell, which is 1.6 GB when some
(but not full) debug information turned is on, we go from 6 ms to
0.006 ms (6 µs) for each lookup from address to compilation unit, a 1000x
speedup.

There is a modest RAM increase of 180 MB in this binary (the existing
linked list over ranges uses about 10 MB, and the entire perf job uses
between 2-3 GB for a medium-size profile); for smaller binaries with few
ranges, there should be hardly any extra RAM usage at all.

Upstream-Status: Backport [https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=b43771b045fb5616da3964f2994eefbe8ae70d32]

CVE: CVE-2023-22608

Signed-off-by: Yash Shinde <Yash.Shinde@windriver.com>

---
 bfd/dwarf2.c | 326 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 312 insertions(+), 14 deletions(-)

diff --git a/bfd/dwarf2.c b/bfd/dwarf2.c
index fdf071c3..0ae50a37 100644
--- a/bfd/dwarf2.c
+++ b/bfd/dwarf2.c
@@ -82,6 +82,77 @@ struct adjusted_section
   bfd_vma adj_vma;
 };
 
+/* A trie to map quickly from address range to compilation unit.
+
+   This is a fairly standard radix-256 trie, used to quickly locate which
+   compilation unit any given address belongs to.  Given that each compilation
+   unit may register hundreds of very small and unaligned ranges (which may
+   potentially overlap, due to inlining and other concerns), and a large
+   program may end up containing hundreds of thousands of such ranges, we cannot
+   scan through them linearly without undue slowdown.
+
+   We use a hybrid trie to avoid memory explosion: There are two types of trie
+   nodes, leaves and interior nodes.  (Almost all nodes are leaves, so they
+   take up the bulk of the memory usage.) Leaves contain a simple array of
+   ranges (high/low address) and which compilation unit contains those ranges,
+   and when we get to a leaf, we scan through it linearly.  Interior nodes
+   contain pointers to 256 other nodes, keyed by the next byte of the address.
+   So for a 64-bit address like 0x1234567abcd, we would start at the root and go
+   down child[0x00]->child[0x00]->child[0x01]->child[0x23]->child[0x45] etc.,
+   until we hit a leaf.  (Nodes are, in general, leaves until they exceed the
+   default allocation of 16 elements, at which point they are converted to
+   interior node if possible.) This gives us near-constant lookup times;
+   the only thing that can be costly is if there are lots of overlapping ranges
+   within a single 256-byte segment of the binary, in which case we have to
+   scan through them all to find the best match.
+
+   For a binary with few ranges, we will in practice only have a single leaf
+   node at the root, containing a simple array.  Thus, the scheme is efficient
+   for both small and large binaries.
+ */
+
+/* Experiments have shown 16 to be a memory-efficient default leaf size.
+   The only case where a leaf will hold more memory than this, is at the
+   bottomost level (covering 256 bytes in the binary), where we'll expand
+   the leaf to be able to hold more ranges if needed.
+ */
+#define TRIE_LEAF_SIZE 16
+
+/* All trie_node pointers will really be trie_leaf or trie_interior,
+   but they have this common head.  */
+struct trie_node
+{
+  /* If zero, we are an interior node.
+     Otherwise, how many ranges we have room for in this leaf.  */
+  unsigned int num_room_in_leaf;
+};
+
+struct trie_leaf
+{
+  struct trie_node head;
+  unsigned int num_stored_in_leaf;
+  struct {
+    struct comp_unit *unit;
+    bfd_vma low_pc, high_pc;
+  } ranges[TRIE_LEAF_SIZE];
+};
+
+struct trie_interior
+{
+  struct trie_node head;
+  struct trie_node *children[256];
+};
+
+static struct trie_node *alloc_trie_leaf (bfd *abfd)
+{
+  struct trie_leaf *leaf =
+    bfd_zalloc (abfd, sizeof (struct trie_leaf));
+  if (leaf == NULL)
+    return NULL;
+  leaf->head.num_room_in_leaf = TRIE_LEAF_SIZE;
+  return &leaf->head;
+}
+
 struct dwarf2_debug_file
 {
   /* The actual bfd from which debug info was loaded.  Might be
@@ -139,6 +210,9 @@ struct dwarf2_debug_file
   /* A list of all previously read comp_units.  */
   struct comp_unit *all_comp_units;
 
+  /* A list of all previously read comp_units with no ranges (yet).  */
+  struct comp_unit *all_comp_units_without_ranges;
+
   /* Last comp unit in list above.  */
   struct comp_unit *last_comp_unit;
 
@@ -147,6 +221,9 @@ struct dwarf2_debug_file
 
   /* Hash table to map offsets to decoded abbrevs.  */
   htab_t abbrev_offsets;
+
+  /* Root of a trie to map addresses to compilation units.  */
+  struct trie_node *trie_root;
 };
 
 struct dwarf2_debug
@@ -220,6 +297,11 @@ struct comp_unit
   /* Chain the previously read compilation units.  */
   struct comp_unit *next_unit;
 
+  /* Chain the previously read compilation units that have no ranges yet.
+     We scan these separately when we have a trie over the ranges.
+     Unused if arange.high != 0. */
+  struct comp_unit *next_unit_without_ranges;
+
   /* Likewise, chain the compilation unit read after this one.
      The comp units are stored in reversed reading order.  */
   struct comp_unit *prev_unit;
@@ -296,6 +378,10 @@ struct comp_unit
 
   /* TRUE if symbols are cached in hash table for faster lookup by name.  */
   bool cached;
+
+  /* Used when iterating over trie leaves to know which units we have
+     already seen in this iteration.  */
+  bool mark;
 };
 
 /* This data structure holds the information of an abbrev.  */
@@ -1766,9 +1852,189 @@ concat_filename (struct line_info_table *table, unsigned int file)
   return strdup (filename);
 }
 
+/* Number of bits in a bfd_vma.  */
+#define VMA_BITS (8 * sizeof (bfd_vma))
+
+/* Check whether [low1, high1) can be combined with [low2, high2),
+   i.e., they touch or overlap.  */
+static bool ranges_overlap (bfd_vma low1,
+			    bfd_vma high1,
+			    bfd_vma low2,
+			    bfd_vma high2)
+{
+  if (low1 == low2 || high1 == high2)
+    return true;
+
+  /* Sort so that low1 is below low2. */
+  if (low1 > low2)
+    {
+      bfd_vma tmp;
+
+      tmp = low1;
+      low1 = low2;
+      low2 = tmp;
+
+      tmp = high1;
+      high1 = high2;
+      high2 = tmp;
+    }
+
+  /* We touch iff low2 == high1.
+     We overlap iff low2 is within [low1, high1). */
+  return (low2 <= high1);
+}
+
+/* Insert an address range in the trie mapping addresses to compilation units.
+   Will return the new trie node (usually the same as is being sent in, but
+   in case of a leaf-to-interior conversion, or expansion of a leaf, it may be
+   different), or NULL on failure.
+ */
+static struct trie_node *insert_arange_in_trie(bfd *abfd,
+					       struct trie_node *trie,
+					       bfd_vma trie_pc,
+					       unsigned int trie_pc_bits,
+					       struct comp_unit *unit,
+					       bfd_vma low_pc,
+					       bfd_vma high_pc)
+{
+  bfd_vma clamped_low_pc, clamped_high_pc;
+  int ch, from_ch, to_ch;
+  bool is_full_leaf = false;
+
+  /* See if we can extend any of the existing ranges.  This merging
+     isn't perfect (if merging opens up the possibility of merging two existing
+     ranges, we won't find them), but it takes the majority of the cases.  */
+  if (trie->num_room_in_leaf > 0)
+    {
+      struct trie_leaf *leaf = (struct trie_leaf *) trie;
+      unsigned int i;
+
+      for (i = 0; i < leaf->num_stored_in_leaf; ++i)
+	{
+	  if (leaf->ranges[i].unit == unit &&
+	      ranges_overlap(low_pc, high_pc,
+			     leaf->ranges[i].low_pc, leaf->ranges[i].high_pc))
+	    {
+	      if (low_pc < leaf->ranges[i].low_pc)
+		leaf->ranges[i].low_pc = low_pc;
+	      if (high_pc > leaf->ranges[i].high_pc)
+		leaf->ranges[i].high_pc = high_pc;
+	      return trie;
+	    }
+	}
+
+      is_full_leaf = leaf->num_stored_in_leaf == trie->num_room_in_leaf;
+    }
+
+  /* If we're a leaf with no more room and we're _not_ at the bottom,
+     convert to an interior node.  */
+  if (is_full_leaf && trie_pc_bits < VMA_BITS)
+    {
+      const struct trie_leaf *leaf = (struct trie_leaf *) trie;
+      unsigned int i;
+
+      trie = bfd_zalloc (abfd, sizeof (struct trie_interior));
+      if (!trie)
+	return NULL;
+      is_full_leaf = false;
+
+      /* TODO: If we wanted to save a little more memory at the cost of
+	 complexity, we could have reused the old leaf node as one of the
+	 children of the new interior node, instead of throwing it away.  */
+      for (i = 0; i < leaf->num_stored_in_leaf; ++i)
+        {
+	  if (!insert_arange_in_trie (abfd, trie, trie_pc, trie_pc_bits,
+				      leaf->ranges[i].unit, leaf->ranges[i].low_pc,
+				      leaf->ranges[i].high_pc))
+	    return NULL;
+	}
+    }
+
+  /* If we're a leaf with no more room and we _are_ at the bottom,
+     we have no choice but to just make it larger. */
+  if (is_full_leaf)
+    {
+      const struct trie_leaf *leaf = (struct trie_leaf *) trie;
+      unsigned int new_room_in_leaf = trie->num_room_in_leaf * 2;
+      struct trie_leaf *new_leaf;
+
+      new_leaf = bfd_zalloc (abfd,
+	sizeof (struct trie_leaf) +
+	  (new_room_in_leaf - TRIE_LEAF_SIZE) * sizeof (leaf->ranges[0]));
+      new_leaf->head.num_room_in_leaf = new_room_in_leaf;
+      new_leaf->num_stored_in_leaf = leaf->num_stored_in_leaf;
+
+      memcpy (new_leaf->ranges,
+	      leaf->ranges,
+	      leaf->num_stored_in_leaf * sizeof (leaf->ranges[0]));
+      trie = &new_leaf->head;
+      is_full_leaf = false;
+
+      /* Now the insert below will go through.  */
+    }
+
+  /* If we're a leaf (now with room), we can just insert at the end.  */
+  if (trie->num_room_in_leaf > 0)
+    {
+      struct trie_leaf *leaf = (struct trie_leaf *) trie;
+
+      unsigned int i = leaf->num_stored_in_leaf++;
+      leaf->ranges[i].unit = unit;
+      leaf->ranges[i].low_pc = low_pc;
+      leaf->ranges[i].high_pc = high_pc;
+      return trie;
+    }
+
+  /* Now we are definitely an interior node, so recurse into all
+     the relevant buckets.  */
+
+  /* Clamp the range to the current trie bucket.  */
+  clamped_low_pc = low_pc;
+  clamped_high_pc = high_pc;
+  if (trie_pc_bits > 0)
+    {
+      bfd_vma bucket_high_pc =
+	trie_pc + ((bfd_vma)-1 >> trie_pc_bits);  /* Inclusive.  */
+      if (clamped_low_pc < trie_pc)
+	clamped_low_pc = trie_pc;
+      if (clamped_high_pc > bucket_high_pc)
+	clamped_high_pc = bucket_high_pc;
+    }
+
+  /* Insert the ranges in all buckets that it spans.  */
+  from_ch = (clamped_low_pc >> (VMA_BITS - trie_pc_bits - 8)) & 0xff;
+  to_ch = ((clamped_high_pc - 1) >> (VMA_BITS - trie_pc_bits - 8)) & 0xff;
+  for (ch = from_ch; ch <= to_ch; ++ch)
+    {
+      struct trie_interior *interior = (struct trie_interior *) trie;
+      struct trie_node *child = interior->children[ch];
+
+      if (child == NULL)
+        {
+	  child = alloc_trie_leaf (abfd);
+	  if (!child)
+	    return NULL;
+	}
+      child = insert_arange_in_trie (abfd,
+				     child,
+				     trie_pc + ((bfd_vma)ch << (VMA_BITS - trie_pc_bits - 8)),
+				     trie_pc_bits + 8,
+				     unit,
+				     low_pc,
+				     high_pc);
+      if (!child)
+	return NULL;
+
+      interior->children[ch] = child;
+    }
+
+    return trie;
+}
+
+
 static bool
-arange_add (const struct comp_unit *unit, struct arange *first_arange,
-	    bfd_vma low_pc, bfd_vma high_pc)
+arange_add (struct comp_unit *unit, struct arange *first_arange,
+	    struct trie_node **trie_root, bfd_vma low_pc, bfd_vma high_pc)
 {
   struct arange *arange;
 
@@ -1776,6 +2042,19 @@ arange_add (const struct comp_unit *unit, struct arange *first_arange,
   if (low_pc == high_pc)
     return true;
 
+  if (trie_root != NULL)
+    {
+      *trie_root = insert_arange_in_trie (unit->file->bfd_ptr,
+					  *trie_root,
+					  0,
+					  0,
+					  unit,
+					  low_pc,
+					  high_pc);
+      if (*trie_root == NULL)
+	return false;
+    }
+
   /* If the first arange is empty, use it.  */
   if (first_arange->high == 0)
     {
@@ -2410,7 +2689,8 @@ decode_line_info (struct comp_unit *unit)
 		    low_pc = address;
 		  if (address > high_pc)
 		    high_pc = address;
-		  if (!arange_add (unit, &unit->arange, low_pc, high_pc))
+		  if (!arange_add (unit, &unit->arange, &unit->file->trie_root,
+				   low_pc, high_pc))
 		    goto line_fail;
 		  break;
 		case DW_LNE_set_address:
@@ -3134,7 +3414,7 @@ find_abstract_instance (struct comp_unit *unit,
 
 static bool
 read_ranges (struct comp_unit *unit, struct arange *arange,
-	     bfd_uint64_t offset)
+	     struct trie_node **trie_root, bfd_uint64_t offset)
 {
   bfd_byte *ranges_ptr;
   bfd_byte *ranges_end;
@@ -3169,7 +3449,7 @@ read_ranges (struct comp_unit *unit, struct arange *arange,
 	base_address = high_pc;
       else
 	{
-	  if (!arange_add (unit, arange,
+	  if (!arange_add (unit, arange, trie_root,
 			   base_address + low_pc, base_address + high_pc))
 	    return false;
 	}
@@ -3179,7 +3459,7 @@ read_ranges (struct comp_unit *unit, struct arange *arange,
 
 static bool
 read_rnglists (struct comp_unit *unit, struct arange *arange,
-	       bfd_uint64_t offset)
+	       struct trie_node **trie_root, bfd_uint64_t offset)
 {
   bfd_byte *rngs_ptr;
   bfd_byte *rngs_end;
@@ -3253,19 +3533,19 @@ read_rnglists (struct comp_unit *unit, struct arange *arange,
 	  return false;
 	}
 
-      if (!arange_add (unit, arange, low_pc, high_pc))
+      if (!arange_add (unit, arange, trie_root, low_pc, high_pc))
 	return false;
     }
 }
 
 static bool
 read_rangelist (struct comp_unit *unit, struct arange *arange,
-		bfd_uint64_t offset)
+		struct trie_node **trie_root, bfd_uint64_t offset)
 {
   if (unit->version <= 4)
-    return read_ranges (unit, arange, offset);
+    return read_ranges (unit, arange, trie_root, offset);
   else
-    return read_rnglists (unit, arange, offset);
+    return read_rnglists (unit, arange, trie_root, offset);
 }
 
 static struct funcinfo *
@@ -3563,7 +3843,8 @@ scan_unit_for_symbols (struct comp_unit *unit)
 
 		case DW_AT_ranges:
 		  if (is_int_form (&attr)
-		      && !read_rangelist (unit, &func->arange, attr.u.val))
+		      && !read_rangelist (unit, &func->arange,
+					  &unit->file->trie_root, attr.u.val))
 		    goto fail;
 		  break;
 
@@ -3679,7 +3960,8 @@ scan_unit_for_symbols (struct comp_unit *unit)
 
       if (func && high_pc != 0)
 	{
-	  if (!arange_add (unit, &func->arange, low_pc, high_pc))
+	  if (!arange_add (unit, &func->arange, &unit->file->trie_root,
+			   low_pc, high_pc))
 	    goto fail;
 	}
     }
@@ -3874,7 +4156,8 @@ parse_comp_unit (struct dwarf2_debug *stash,
 
 	case DW_AT_ranges:
 	  if (is_int_form (&attr)
-	      && !read_rangelist (unit, &unit->arange, attr.u.val))
+	      && !read_rangelist (unit, &unit->arange,
+				  &unit->file->trie_root, attr.u.val))
 	    return NULL;
 	  break;
 
@@ -3916,7 +4199,8 @@ parse_comp_unit (struct dwarf2_debug *stash,
     high_pc += low_pc;
   if (high_pc != 0)
     {
-      if (!arange_add (unit, &unit->arange, low_pc, high_pc))
+      if (!arange_add (unit, &unit->arange, &unit->file->trie_root,
+		       low_pc, high_pc))
 	return NULL;
     }
 
@@ -4747,6 +5031,14 @@ _bfd_dwarf2_slurp_debug_info (bfd *abfd, bfd *debug_bfd,
   if (!stash->alt.abbrev_offsets)
     return false;
 
+  stash->f.trie_root = alloc_trie_leaf (abfd);
+  if (!stash->f.trie_root)
+    return false;
+
+  stash->alt.trie_root = alloc_trie_leaf (abfd);
+  if (!stash->alt.trie_root)
+    return false;
+
   *pinfo = stash;
 
   if (debug_bfd == NULL)
@@ -4918,6 +5210,12 @@ stash_comp_unit (struct dwarf2_debug *stash, struct dwarf2_debug_file *file)
 	  each->next_unit = file->all_comp_units;
 	  file->all_comp_units = each;
 
+	  if (each->arange.high == 0)
+	    {
+	      each->next_unit_without_ranges = file->all_comp_units_without_ranges;
+	      file->all_comp_units_without_ranges = each->next_unit_without_ranges;
+	    }
+
 	  file->info_ptr += length;
 	  return each;
 	}