From 35418a73f7c9cefebe392b1ea0f012fccaf89801 Mon Sep 17 00:00:00 2001 From: Guy Harris Date: Wed, 19 Aug 2020 23:58:20 -0700 Subject: [PATCH] Add format_text_string(), which gets the length with strlen(). format_text(alloc, string, strlen(string)) is a common idiom; provide format_text_string(), which does the strlen(string) for you. (Any string used in a %s to set the text of a protocol tree item, if it was directly extracted from the packet, should be run through a format_text routine, to ensure that it's valid UTF-8 and that control characters are handled correctly.) Update comments while we're at it. Change-Id: Ia8549efa1c96510ffce97178ed4ff7be4b02eb6e Reviewed-on: https://code.wireshark.org/review/38202 Petri-Dish: Guy Harris Tested-by: Petri Dish Buildbot Reviewed-by: Guy Harris Upstream-Status: Backport [https://gitlab.com/wireshark/wireshark/-/commit/35418a73f7c9cefebe392b1ea0f012fccaf89801] Comment: to backport fix for CVE-2023-0667, add function format_text_string(). Signed-off-by: Hitendra Prajapati --- epan/strutil.c | 33 ++++++++++++++++++++++++++++---- epan/strutil.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/epan/strutil.c b/epan/strutil.c index 347a173..bc3b19e 100644 --- a/epan/strutil.c +++ b/epan/strutil.c @@ -193,10 +193,11 @@ get_token_len(const guchar *linep, const guchar *lineend, #define UNPOOP 0x1F4A9 /* - * Given a string, expected to be in UTF-8 but possibly containing - * invalid sequences (as it may have come from packet data), generate - * a valid UTF-8 string from it, allocated with the specified wmem - * allocator, that: + * Given a wmem scope, a not-necessarily-null-terminated string, + * expected to be in UTF-8 but possibly containing invalid sequences + * (as it may have come from packet data), and the length of the string, + * generate a valid UTF-8 string from it, allocated in the specified + * wmem scope, that: * * shows printable Unicode characters as themselves; * @@ -493,6 +494,30 @@ format_text(wmem_allocator_t* allocator, const guchar *string, size_t len) return fmtbuf; } +/** Given a wmem scope and a null-terminated string, expected to be in + * UTF-8 but possibly containing invalid sequences (as it may have come + * from packet data), and the length of the string, generate a valid + * UTF-8 string from it, allocated in the specified wmem scope, that: + * + * shows printable Unicode characters as themselves; + * + * shows non-printable ASCII characters as C-style escapes (octal + * if not one of the standard ones such as LF -> '\n'); + * + * shows non-printable Unicode-but-not-ASCII characters as + * their universal character names; + * + * shows illegal UTF-8 sequences as a sequence of bytes represented + * as C-style hex escapes; + * + * and return a pointer to it. + */ +gchar * +format_text_string(wmem_allocator_t* allocator, const guchar *string) +{ + return format_text(allocator, string, strlen(string)); +} + /* * Given a string, generate a string from it that shows non-printable * characters as C-style escapes except a whitespace character diff --git a/epan/strutil.h b/epan/strutil.h index 2046cb0..705beb5 100644 --- a/epan/strutil.h +++ b/epan/strutil.h @@ -46,18 +46,61 @@ WS_DLL_PUBLIC int get_token_len(const guchar *linep, const guchar *lineend, const guchar **next_token); -/** Given a string, generate a string from it that shows non-printable - * characters as C-style escapes, and return a pointer to it. +/** Given a wmem scope, a not-necessarily-null-terminated string, + * expected to be in UTF-8 but possibly containing invalid sequences + * (as it may have come from packet data), and the length of the string, + * generate a valid UTF-8 string from it, allocated in the specified + * wmem scope, that: + * + * shows printable Unicode characters as themselves; + * + * shows non-printable ASCII characters as C-style escapes (octal + * if not one of the standard ones such as LF -> '\n'); + * + * shows non-printable Unicode-but-not-ASCII characters as + * their universal character names; + * + * shows illegal UTF-8 sequences as a sequence of bytes represented + * as C-style hex escapes; + * + * and return a pointer to it. * * @param allocator The wmem scope - * @param line A pointer to the input string + * @param string A pointer to the input string * @param len The length of the input string * @return A pointer to the formatted string * * @see tvb_format_text() */ WS_DLL_PUBLIC -gchar* format_text(wmem_allocator_t* allocator, const guchar *line, size_t len); +gchar* format_text(wmem_allocator_t* allocator, const guchar *string, size_t len); + +/** Given a wmem scope and a null-terminated string, expected to be in + * UTF-8 but possibly containing invalid sequences (as it may have come + * from packet data), and the length of the string, generate a valid + * UTF-8 string from it, allocated in the specified wmem scope, that: + * + * shows printable Unicode characters as themselves; + * + * shows non-printable ASCII characters as C-style escapes (octal + * if not one of the standard ones such as LF -> '\n'); + * + * shows non-printable Unicode-but-not-ASCII characters as + * their universal character names; + * + * shows illegal UTF-8 sequences as a sequence of bytes represented + * as C-style hex escapes; + * + * and return a pointer to it. + * + * @param allocator The wmem scope + * @param string A pointer to the input string + * @return A pointer to the formatted string + * + * @see tvb_format_text() + */ +WS_DLL_PUBLIC +gchar* format_text_string(wmem_allocator_t* allocator, const guchar *string); /** * Given a string, generate a string from it that shows non-printable -- 2.25.1