https://bugs.gentoo.org/974285
https://gstreamer.freedesktop.org/security/sa-2026-0023.html
https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/11237

From a26b58bfef4e8562c5e85b57896ba5fc1f7c64a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= <sebastian@centricular.com>
Date: Mon, 30 Mar 2026 20:40:26 +0300
Subject: [PATCH 1/3] subparse: Replace a `g_strv_length()` in a loop header
 with more efficient code

It doesn't really matter in this case as the array is small.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/11237>
--- a/gst/subparse/gstsubparse.c
+++ b/gst/subparse/gstsubparse.c
@@ -915,56 +915,53 @@ static void
 parse_webvtt_cue_settings (ParserState * state, const gchar * settings)
 {
   gchar **splitted_settings = g_strsplit_set (settings, " \t", -1);
-  gint i = 0;
   gint16 text_position, text_size;
   gint16 line_position;
   gboolean vertical_found = FALSE;
   gboolean alignment_found = FALSE;
 
-  while (i < g_strv_length (splitted_settings)) {
+  for (gchar ** setting_ptr = splitted_settings; *setting_ptr; setting_ptr++) {
     gboolean valid_tag = FALSE;
-    switch (splitted_settings[i][0]) {
+    switch ((*setting_ptr)[0]) {
       case 'T':
-        if (sscanf (splitted_settings[i], "T:%" G_GINT16_FORMAT "%%",
+        if (sscanf (*setting_ptr, "T:%" G_GINT16_FORMAT "%%",
                 &text_position) > 0) {
           state->text_position = (guint8) text_position;
           valid_tag = TRUE;
         }
         break;
       case 'D':
-        if (strlen (splitted_settings[i]) > 2) {
+        if (strlen (*setting_ptr) > 2) {
           vertical_found = TRUE;
           g_free (state->vertical);
-          state->vertical = g_strdup (splitted_settings[i] + 2);
+          state->vertical = g_strdup (*setting_ptr + 2);
           valid_tag = TRUE;
         }
         break;
       case 'L':
-        if (g_str_has_suffix (splitted_settings[i], "%")) {
-          if (sscanf (splitted_settings[i], "L:%" G_GINT16_FORMAT "%%",
+        if (g_str_has_suffix (*setting_ptr, "%")) {
+          if (sscanf (*setting_ptr, "L:%" G_GINT16_FORMAT "%%",
                   &line_position) > 0) {
             state->line_position = line_position;
             valid_tag = TRUE;
           }
         } else {
-          if (sscanf (splitted_settings[i], "L:%" G_GINT16_FORMAT,
-                  &line_position) > 0) {
+          if (sscanf (*setting_ptr, "L:%" G_GINT16_FORMAT, &line_position) > 0) {
             state->line_number = line_position;
             valid_tag = TRUE;
           }
         }
         break;
       case 'S':
-        if (sscanf (splitted_settings[i], "S:%" G_GINT16_FORMAT "%%",
-                &text_size) > 0) {
+        if (sscanf (*setting_ptr, "S:%" G_GINT16_FORMAT "%%", &text_size) > 0) {
           state->text_size = (guint8) text_size;
           valid_tag = TRUE;
         }
         break;
       case 'A':
-        if (strlen (splitted_settings[i]) > 2) {
+        if (strlen (*setting_ptr) > 2) {
           g_free (state->alignment);
-          state->alignment = g_strdup (splitted_settings[i] + 2);
+          state->alignment = g_strdup (*setting_ptr + 2);
           alignment_found = TRUE;
           valid_tag = TRUE;
         }
@@ -973,10 +970,8 @@ parse_webvtt_cue_settings (ParserState * state, const gchar * settings)
         break;
     }
     if (!valid_tag) {
-      GST_LOG ("Invalid or unrecognised setting found: %s",
-          splitted_settings[i]);
+      GST_LOG ("Invalid or unrecognised setting found: %s", *setting_ptr);
     }
-    i++;
   }
   g_strfreev (splitted_settings);
   if (!vertical_found) {
-- 
GitLab


From 447191a31806372d200922086213c687ba4f30ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= <sebastian@centricular.com>
Date: Mon, 30 Mar 2026 20:39:08 +0300
Subject: [PATCH 2/3] subparse: Replace regex string matching / replacing with
 plain C string parsing

The regexes had suboptimal behaviour that easily took 30s on a 1kB long line and
probably had quadratic behaviour in the length of the string.

Fixes https://gitlab.freedesktop.org/gstreamer/gstreamer/-/work_items/5002

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/11237>
--- a/gst/subparse/gstsubparse.c
+++ b/gst/subparse/gstsubparse.c
@@ -41,7 +41,6 @@
 #include "gstsubparseelements.h"
 
 #define DEFAULT_ENCODING   NULL
-#define ATTRIBUTE_REGEX "\\s?[a-zA-Z0-9\\. \t\\(\\)]*"
 static const gchar *allowed_srt_tags[] = { "i", "b", "u", NULL };
 static const gchar *allowed_vtt_tags[] =
     { "i", "b", "c", "u", "v", "ruby", "rt", NULL };
@@ -639,50 +638,112 @@ strip_trailing_newlines (gchar * txt)
  * escaping everything (the text between these simple markers isn't
  * necessarily escaped, so it seems best to do it like this) */
 static void
-subrip_unescape_formatting (gchar * txt, gconstpointer allowed_tags_ptr,
+subrip_unescape_formatting (gchar * txt, gchar ** allowed_tags,
     gboolean allows_tag_attributes)
 {
-  gchar *res;
-  GRegex *tag_regex;
-  gchar *allowed_tags_pattern, *search_pattern;
-  const gchar *replace_pattern;
+  const gchar *p;
+  GString *out;
 
   /* No processing needed if no escaped tag marker found in the string. */
   if (strstr (txt, "&lt;") == NULL)
     return;
 
-  /* Build a list of alternates for our regexp.
-   * FIXME: Could be built once and stored */
-  allowed_tags_pattern = g_strjoinv ("|", (gchar **) allowed_tags_ptr);
-  /* Look for starting/ending escaped tags with optional attributes. */
-  search_pattern = g_strdup_printf ("&lt;(/)?\\ *(%s)(%s)&gt;",
-      allowed_tags_pattern, ATTRIBUTE_REGEX);
-  /* And unescape appropriately */
-  if (allows_tag_attributes) {
-    replace_pattern = "<\\1\\2\\3>";
-  } else {
-    replace_pattern = "<\\1\\2>";
-  }
+  out = g_string_new ("");
+  p = txt;
 
-  tag_regex = g_regex_new (search_pattern, 0, 0, NULL);
-  res = g_regex_replace (tag_regex, txt, strlen (txt), 0,
-      replace_pattern, 0, NULL);
+  while (*p) {
+    const gchar *lt;
+    const gchar *gt;
 
-  /* Replacing can fail. Return an empty string in that case. */
-  if (!res) {
-    strcpy (txt, "");
-    return;
-  }
+    /* Find next &lt; */
+    lt = strstr (p, "&lt;");
+    if (!lt) {
+      /* No more &lt; found - copy remainder and done */
+      g_string_append (out, p);
+      break;
+    }
 
-  /* res will always be shorter than the input or identical, so this
-   * copy is OK */
-  strcpy (txt, res);
+    /* Copy everything before &lt; */
+    g_string_append_len (out, p, lt - p);
+
+    /* Skip &lt; */
+    lt += 4;
+
+    /* Find matching &gt; */
+    gt = strstr (lt, "&gt;");
+    if (!gt) {
+      /* No closing &gt; - copy everything until the end as is and end */
+      g_string_append (out, lt - 4);
+      break;
+    }
+
+    /* Check for optional closing tag / */
+    gboolean is_closing = FALSE;
+    const gchar *tag_start = lt;
+    if (*tag_start == '/') {
+      is_closing = TRUE;
+      tag_start++;
+    }
+
+    /* Skip optional whitespace before tag name */
+    while (*tag_start == ' ' || *tag_start == '\t')
+      tag_start++;
+
+    /* Extract tag name */
+    const gchar *tag_end = tag_start;
+    while (g_ascii_isalnum (*tag_end))
+      tag_end++;
+    gsize tag_len = tag_end - tag_start;
+
+    /* Check if tag is allowed */
+    gboolean allowed = FALSE;
+    gchar **tag_ptr;
+    for (tag_ptr = allowed_tags; *tag_ptr; tag_ptr++) {
+      if (strlen (*tag_ptr) == tag_len &&
+          strncmp (*tag_ptr, tag_start, tag_len) == 0) {
+        allowed = TRUE;
+        break;
+      }
+    }
+
+    if (!allowed) {
+      /* Tag not allowed - copy everything between and including &lt;...&gt; as is */
+      g_string_append_len (out, lt - 4, gt + 4 - (lt - 4));
+      p = gt + 4;
+      continue;
+    }
 
-  g_free (res);
-  g_free (search_pattern);
-  g_free (allowed_tags_pattern);
+    /* Otherwise handle allowed tag by unescaping < */
+    g_string_append_c (out, '<');
+    if (is_closing)
+      g_string_append_c (out, '/');
+    g_string_append_len (out, tag_start, tag_len);
+
+    /* If attributes allowed then copy them over, otherwise ignore them */
+    if (allows_tag_attributes) {
+      /* Scan for optional attributes */
+      const gchar *attr_start = tag_end;
+
+      /* Find attributes end */
+      while (tag_end < gt &&
+          (g_ascii_isalnum (*tag_end) || *tag_end == '.' ||
+              *tag_end == ' ' || *tag_end == '\t' ||
+              *tag_end == '(' || *tag_end == ')'))
+        tag_end++;
+
+      /* Copy attributes */
+      g_string_append_len (out, attr_start, tag_end - attr_start);
+    }
 
-  g_regex_unref (tag_regex);
+    /* Append closing > and skip to next */
+    g_string_append_c (out, '>');
+    p = gt + 4;
+  }
+
+  /* out will always be shorter than the input or identical, so this
+   * copy is OK */
+  strcpy (txt, out->str);
+  g_string_free (out, TRUE);
 }
 
 
@@ -727,19 +788,11 @@ subrip_remove_unhandled_tags (gchar * txt)
  * input! This function adds missing closing markup tags and removes
  * broken closing tags for tags that have never been opened. */
 static void
-subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr)
+subrip_fix_up_markup (gchar ** p_txt, gchar ** allowed_tags)
 {
   gchar *cur, *next_tag;
   GPtrArray *open_tags = NULL;
   guint num_open_tags = 0;
-  const gchar *iter_tag;
-  guint offset = 0;
-  guint index;
-  gchar *cur_tag;
-  gchar *end_tag;
-  GRegex *tag_regex;
-  GMatchInfo *match_info;
-  gchar **allowed_tags = (gchar **) allowed_tags_ptr;
 
   g_assert (*p_txt != NULL);
 
@@ -749,44 +802,75 @@ subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr)
     next_tag = strchr (cur, '<');
     if (next_tag == NULL)
       break;
-    offset = 0;
-    index = 0;
-    while (index < g_strv_length (allowed_tags)) {
-      iter_tag = allowed_tags[index];
-      /* Look for a white listed tag */
-      cur_tag = g_strconcat ("<", iter_tag, ATTRIBUTE_REGEX, ">", NULL);
-      tag_regex = g_regex_new (cur_tag, 0, 0, NULL);
-      (void) g_regex_match (tag_regex, next_tag, 0, &match_info);
-
-      if (g_match_info_matches (match_info)) {
-        gint start_pos, end_pos;
-        gchar *word = g_match_info_fetch (match_info, 0);
-        g_match_info_fetch_pos (match_info, 0, &start_pos, &end_pos);
-        if (start_pos == 0) {
-          offset = strlen (word);
+
+    /* Look for allowed tag */
+    guint offset = 0;
+    gboolean is_closing = FALSE;
+    for (gchar ** tag_ptr = allowed_tags; *tag_ptr; tag_ptr++) {
+      const gchar *tag_start = next_tag + 1;
+
+      is_closing = (*tag_start == '/');
+      if (is_closing)
+        tag_start++;
+
+      /* Skip optional whitespace before tag start */
+      while (*tag_start == ' ' || *tag_start == '\t')
+        tag_start++;
+
+      /* Extract tag name length */
+      const gchar *tag_end = tag_start;
+      while (g_ascii_isalnum (*tag_end))
+        tag_end++;
+      gsize tag_len = tag_end - tag_start;
+
+      /* Check if tag name matches */
+      if (strlen (*tag_ptr) == tag_len &&
+          g_ascii_strncasecmp (*tag_ptr, tag_start, tag_len) == 0) {
+        /* Found allowed tag - calculate offset to position after tag */
+
+        /* Check for optional attributes */
+        if (*tag_end == ' ' || *tag_end == '\t' || *tag_end == '.') {
+          while (*tag_end && *tag_end != '>' &&
+              (g_ascii_isalnum (*tag_end) || *tag_end == '.' ||
+                  *tag_end == ' ' || *tag_end == '\t' ||
+                  *tag_end == '(' || *tag_end == ')')) {
+            tag_end++;
+          }
+        }
+
+        /* Check for closing >, if not found skip over this */
+        if (*tag_end == '>') {
+          offset = tag_end - (next_tag + 1);
+
+          /* Full opening tag found, let's keep track of it and continue */
+          if (!is_closing) {
+            g_ptr_array_add (open_tags, g_ascii_strdown (*tag_ptr, -1));
+            ++num_open_tags;
+          }
+          break;
         }
-        g_free (word);
-      }
-      g_match_info_free (match_info);
-      g_regex_unref (tag_regex);
-      g_free (cur_tag);
-      index++;
-      if (offset) {
-        /* OK we found a tag, let's keep track of it */
-        g_ptr_array_add (open_tags, g_ascii_strdown (iter_tag, -1));
-        ++num_open_tags;
-        break;
       }
+
+      /* No closing > found, continue */
+      offset = 0;
+    }
+
+    /* Not a valid tag - skip to next */
+    if (offset == 0) {
+      ++next_tag;
+      cur = next_tag;
+      continue;
     }
 
-    if (offset) {
+    /* Not a closing tag - skip to the next */
+    if (!is_closing) {
       next_tag += offset;
       cur = next_tag;
       continue;
     }
 
     if (*next_tag == '<' && *(next_tag + 1) == '/') {
-      end_tag = strchr (next_tag, '>');
+      gchar *end_tag = strchr (next_tag, '>');
       if (end_tag) {
         const gchar *last = NULL;
         if (num_open_tags > 0)
@@ -810,6 +894,7 @@ subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr)
     cur = next_tag;
   }
 
+  /* if there are still open tags, close them all at the end */
   if (num_open_tags > 0) {
     GString *s;
 
--- a/gst/subparse/gstsubparse.h
+++ b/gst/subparse/gstsubparse.h
@@ -48,7 +48,7 @@ typedef struct {
   guint8 text_size;          /* percent value */
   gchar *vertical;        /* "", "vertical", "vertical-lr" */
   gchar *alignment;       /* "", "start", "middle", "end" */
-  gconstpointer allowed_tags; /* list of markup tags allowed in the cue text. */
+  gchar **allowed_tags; /* list of markup tags allowed in the cue text. */
   gboolean allows_tag_attributes;
 } ParserState;
 
-- 
GitLab


From 9c4f69eebc86eca2b3881a9ea7a524eb70687793 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= <sebastian@centricular.com>
Date: Mon, 30 Mar 2026 23:37:04 +0300
Subject: [PATCH 3/3] subparse: Fix handling of closing of multi-byte tags

They were previously wrongly considered as wrong tags due to using wrong
byte offsets into the string that only happened to match for single-byte tags.

Add a test for this case too.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/11237>
--- a/gst/subparse/gstsubparse.c
+++ b/gst/subparse/gstsubparse.c
@@ -869,29 +869,24 @@ subrip_fix_up_markup (gchar ** p_txt, gchar ** allowed_tags)
       continue;
     }
 
-    if (*next_tag == '<' && *(next_tag + 1) == '/') {
-      gchar *end_tag = strchr (next_tag, '>');
-      if (end_tag) {
-        const gchar *last = NULL;
-        if (num_open_tags > 0)
-          last = g_ptr_array_index (open_tags, num_open_tags - 1);
-        if (num_open_tags == 0
-            || g_ascii_strncasecmp (end_tag - 1, last, strlen (last))) {
-          GST_LOG ("broken input, closing tag '%s' is not open", next_tag);
-          /* Move everything after the tag end, including closing \0 */
-          memmove (next_tag, end_tag + 1, strlen (end_tag));
-          cur = next_tag;
-          continue;
-        } else {
-          --num_open_tags;
-          g_ptr_array_remove_index (open_tags, num_open_tags);
-          cur = end_tag + 1;
-          continue;
-        }
-      }
+    /* Otherwise a closing tag */
+    gchar *tag_end = strchr (next_tag, '>');
+    const gchar *last = NULL;
+    if (num_open_tags > 0)
+      last = g_ptr_array_index (open_tags, num_open_tags - 1);
+    /* Check if the closing tag is the last tag that was opened */
+    if (num_open_tags == 0
+        || g_ascii_strncasecmp (next_tag + 2, last, strlen (last)) != 0) {
+      GST_LOG ("broken input, closing tag '%s' is not open", next_tag);
+      /* Skip over the tag by moving everything after the tag end, including closing \0 */
+      memmove (next_tag, tag_end + 1, strlen (tag_end + 1) + 1);
+      cur = next_tag;
+      continue;
     }
-    ++next_tag;
-    cur = next_tag;
+
+    --num_open_tags;
+    g_ptr_array_remove_index (open_tags, num_open_tags);
+    cur = tag_end + 1;
   }
 
   /* if there are still open tags, close them all at the end */
--- a/tests/check/elements/subparse.c
+++ b/tests/check/elements/subparse.c
@@ -504,9 +504,25 @@ GST_START_TEST (test_webvtt)
     ,
   };
 
+  /* Tests with a wrong multi-character closing tags before the end of the line */
+  SubParseInputChunk webvtt_input3[] = {
+    {
+          "1\n00:00:00,000 --> 00:00:01,000\n<ruby>Hello!</ruby>World!\n\n",
+        0 * GST_SECOND, 1 * GST_SECOND, "<ruby>Hello!</ruby>World!"},
+    {
+          "1\n00:00:01,000 --> 00:00:02,000\n<ruby>Hello!</i></ruby>World!\n\n",
+        1 * GST_SECOND, 2 * GST_SECOND, "<ruby>Hello!</ruby>World!"}
+    ,
+    {
+          "1\n00:00:02,000 --> 00:00:03,000\n<i>World!</ruby></i>Hello!\n\n",
+        2 * GST_SECOND, 3 * GST_SECOND, "<i>World!</i>Hello!"}
+    ,
+  };
+
   test_vtt_do_test (webvtt_input, 0, G_N_ELEMENTS (webvtt_input));
   test_vtt_do_test (webvtt_input1, 0, G_N_ELEMENTS (webvtt_input1));
   test_vtt_do_test (webvtt_input2, 0, G_N_ELEMENTS (webvtt_input2));
+  test_vtt_do_test (webvtt_input3, 0, G_N_ELEMENTS (webvtt_input3));
 }
 
 GST_END_TEST;
-- 
GitLab

