[FFmpeg-devel] [PATCH v2 1/6] avformat/avlanguage: add ff_bcp47_parse() for parsing BCP47 locales
Marth64
marth64 at proxyid.net
Wed Nov 27 08:09:22 EET 2024
Signed-off-by: Marth64 <marth64 at proxyid.net>
---
libavformat/avlanguage.c | 85 ++++++++++++++++++++++++++++++++++++++++
libavformat/avlanguage.h | 7 ++++
2 files changed, 92 insertions(+)
diff --git a/libavformat/avlanguage.c b/libavformat/avlanguage.c
index 202d9aa835..eef2ad8ff7 100644
--- a/libavformat/avlanguage.c
+++ b/libavformat/avlanguage.c
@@ -19,7 +19,10 @@
*/
#include "avlanguage.h"
+#include "libavutil/avstring.h"
+#include "libavutil/error.h"
#include "libavutil/macros.h"
+#include "libavutil/mem.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -768,3 +771,85 @@ const char *ff_convert_lang_to(const char *lang, enum AVLangCodespace target_cod
return NULL;
}
+
+static int bcp47_validate_subtag(const char *s)
+{
+ if (strlen(s) > 8)
+ return 0;
+
+ while (*s && ((*s >= 'a' && *s <= 'z') ||
+ (*s >= 'A' && *s <= 'Z') ||
+ (*s >= '0' && *s <= '9')))
+ s++;
+ return !*s;
+}
+
+const int ff_bcp47_parse(const char *s, char **language, char ***subtags, int *nb_subtags)
+{
+ int ret, tok_partn = 0;
+ char *tok_tmp, *tok_part, *tok_saveptr;
+
+ const char *parsed_language;
+ char **parsed_subtags = NULL;
+ int parsed_nb_subtags = 0;
+
+ tok_tmp = av_strdup(s);
+ if (!tok_tmp)
+ return AVERROR(ENOMEM);
+
+ tok_part = av_strtok(tok_tmp, "-", &tok_saveptr);
+ while (tok_part) {
+ char *new_subtag;
+
+ if (!bcp47_validate_subtag(tok_part)) {
+ ret = AVERROR_INVALIDDATA;
+ goto end_fail;
+ }
+
+ /* ensure the first component is a language code we recognize */
+ if (tok_partn == 0) {
+ parsed_language = ff_convert_lang_to(tok_part, AV_LANG_ISO639_2_BIBL);
+
+ if (!parsed_language) {
+ ret = AVERROR_INVALIDDATA;
+ goto end_fail;
+ }
+ } else {
+ new_subtag = av_strdup(tok_part);
+ if (!new_subtag) {
+ ret = AVERROR(ENOMEM);
+ goto end_fail;
+ }
+
+ ret = av_dynarray_add_nofree(&parsed_subtags, &parsed_nb_subtags, new_subtag);
+ if (ret < 0)
+ goto end_fail;
+ }
+
+ tok_part = av_strtok(NULL, "-", &tok_saveptr);
+ tok_partn++;
+ }
+
+ av_free(tok_tmp);
+
+ *language = (char *)parsed_language;
+ *subtags = parsed_subtags;
+ *nb_subtags = parsed_nb_subtags;
+
+ return 0;
+
+end_fail:
+ if (parsed_subtags) {
+ for (int i = 0; i < parsed_nb_subtags; i++)
+ av_free(parsed_subtags[i]);
+ av_freep(&parsed_subtags);
+ }
+
+ av_free(tok_tmp);
+
+ *language = NULL;
+ *subtags = NULL;
+ *nb_subtags = 0;
+
+ return ret;
+}
diff --git a/libavformat/avlanguage.h b/libavformat/avlanguage.h
index 1901e78407..7a6e88832d 100644
--- a/libavformat/avlanguage.h
+++ b/libavformat/avlanguage.h
@@ -36,4 +36,11 @@ enum AVLangCodespace {
*/
const char *ff_convert_lang_to(const char *lang, enum AVLangCodespace target_codespace);
+/**
+ * Parse a BCP47 locale string into its respective language (as AV_LANG_ISO639_2_BIBL)
+ * and an array of all subtags, validating each component along the way.
+ * @return 0 on success, <0 on error (and language, subtags will be set to NULL)
+ */
+const int ff_bcp47_parse(const char *s, char **language, char ***subtags, int *nb_subtags);
+
#endif /* AVFORMAT_AVLANGUAGE_H */
--
2.34.1
More information about the ffmpeg-devel
mailing list