From 35418a73f7c9cefebe392b1ea0f012fccaf89801 Mon Sep 17 00:00:00 2001
From: Guy Harris <gharris@sonic.net>
Date: Wed, 19 Aug 2020 23:58:20 -0700
Subject: [PATCH] Add format_text_string(), which gets the length with
 strlen().

format_text(alloc, string, strlen(string)) is a common idiom; provide
format_text_string(), which does the strlen(string) for you.  (Any
string used in a %s to set the text of a protocol tree item, if it was
directly extracted from the packet, should be run through a format_text
routine, to ensure that it's valid UTF-8 and that control characters are
handled correctly.)

Update comments while we're at it.

Change-Id: Ia8549efa1c96510ffce97178ed4ff7be4b02eb6e
Reviewed-on: https://code.wireshark.org/review/38202
Petri-Dish: Guy Harris <gharris@sonic.net>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <gharris@sonic.net>

Upstream-Status: Backport [https://gitlab.com/wireshark/wireshark/-/commit/35418a73f7c9cefebe392b1ea0f012fccaf89801]
Comment: to backport fix for CVE-2023-0667, add function format_text_string().
Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com>
---
 epan/strutil.c | 33 ++++++++++++++++++++++++++++----
 epan/strutil.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/epan/strutil.c b/epan/strutil.c
index 347a173..bc3b19e 100644
--- a/epan/strutil.c
+++ b/epan/strutil.c
@@ -193,10 +193,11 @@ get_token_len(const guchar *linep, const guchar *lineend,
 #define UNPOOP 0x1F4A9
 
 /*
- * Given a string, expected to be in UTF-8 but possibly containing
- * invalid sequences (as it may have come from packet data), generate
- * a valid UTF-8 string from it, allocated with the specified wmem
- * allocator, that:
+ * Given a wmem scope, a not-necessarily-null-terminated string,
+ * expected to be in UTF-8 but possibly containing invalid sequences
+ * (as it may have come from packet data), and the length of the string,
+ * generate a valid UTF-8 string from it, allocated in the specified
+ * wmem scope, that:
  *
  *   shows printable Unicode characters as themselves;
  *
@@ -493,6 +494,30 @@ format_text(wmem_allocator_t* allocator, const guchar *string, size_t len)
     return fmtbuf;
 }
 
+/** Given a wmem scope and a null-terminated string, expected to be in
+ *  UTF-8 but possibly containing invalid sequences (as it may have come
+ *  from packet data), and the length of the string, generate a valid
+ *  UTF-8 string from it, allocated in the specified wmem scope, that:
+ *
+ *   shows printable Unicode characters as themselves;
+ *
+ *   shows non-printable ASCII characters as C-style escapes (octal
+ *   if not one of the standard ones such as LF -> '\n');
+ *
+ *   shows non-printable Unicode-but-not-ASCII characters as
+ *   their universal character names;
+ *
+ *   shows illegal UTF-8 sequences as a sequence of bytes represented
+ *   as C-style hex escapes;
+ *
+ *  and return a pointer to it.
+ */
+gchar *
+format_text_string(wmem_allocator_t* allocator, const guchar *string)
+{
+	return format_text(allocator, string, strlen(string));
+}
+
 /*
  * Given a string, generate a string from it that shows non-printable
  * characters as C-style escapes except a whitespace character
diff --git a/epan/strutil.h b/epan/strutil.h
index 2046cb0..705beb5 100644
--- a/epan/strutil.h
+++ b/epan/strutil.h
@@ -46,18 +46,61 @@ WS_DLL_PUBLIC
 int        get_token_len(const guchar *linep, const guchar *lineend,
     const guchar **next_token);
 
-/** Given a string, generate a string from it that shows non-printable
- *  characters as C-style escapes, and return a pointer to it.
+/** Given a wmem scope, a not-necessarily-null-terminated string,
+ *  expected to be in UTF-8 but possibly containing invalid sequences
+ *  (as it may have come from packet data), and the length of the string,
+ *  generate a valid UTF-8 string from it, allocated in the specified
+ *  wmem scope, that:
+ *
+ *   shows printable Unicode characters as themselves;
+ *
+ *   shows non-printable ASCII characters as C-style escapes (octal
+ *   if not one of the standard ones such as LF -> '\n');
+ *
+ *   shows non-printable Unicode-but-not-ASCII characters as
+ *   their universal character names;
+ *
+ *   shows illegal UTF-8 sequences as a sequence of bytes represented
+ *   as C-style hex escapes;
+ *
+ *  and return a pointer to it.
  *
  * @param allocator The wmem scope
- * @param line A pointer to the input string
+ * @param string A pointer to the input string
  * @param len The length of the input string
  * @return A pointer to the formatted string
  *
  * @see tvb_format_text()
  */
 WS_DLL_PUBLIC
-gchar*     format_text(wmem_allocator_t* allocator, const guchar *line, size_t len);
+gchar*     format_text(wmem_allocator_t* allocator, const guchar *string, size_t len);
+
+/** Given a wmem scope and a null-terminated string, expected to be in
+ *  UTF-8 but possibly containing invalid sequences (as it may have come
+ *  from packet data), and the length of the string, generate a valid
+ *  UTF-8 string from it, allocated in the specified wmem scope, that:
+ *
+ *   shows printable Unicode characters as themselves;
+ *
+ *   shows non-printable ASCII characters as C-style escapes (octal
+ *   if not one of the standard ones such as LF -> '\n');
+ *
+ *   shows non-printable Unicode-but-not-ASCII characters as
+ *   their universal character names;
+ *
+ *   shows illegal UTF-8 sequences as a sequence of bytes represented
+ *   as C-style hex escapes;
+ *
+ *  and return a pointer to it.
+ *
+ * @param allocator The wmem scope
+ * @param string A pointer to the input string
+ * @return A pointer to the formatted string
+ *
+ * @see tvb_format_text()
+ */
+WS_DLL_PUBLIC
+gchar*     format_text_string(wmem_allocator_t* allocator, const guchar *string);
 
 /**
  * Given a string, generate a string from it that shows non-printable
-- 
2.25.1