[FFmpeg-devel] [PATCH v2 6/6] avformat/subtitles: guess UTF-16 if no BOM present

wm4 nfxjfg at googlemail.com
Thu Sep 4 22:40:27 CEST 2014


---
Follows reimar's suggestion for detecting UTF-16. If the detection goes
wrong, probing the format won't succeed, so this should not break
anything.

I didn't use ffio_ensure_seekback), because this apparently reallocates
the buffer - which we certainly don't want, because the probe API
doesn't allow this.

Probably too messy, feel free to drop.
---
 libavformat/subtitles.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c
index 3069477..7d6a93c 100644
--- a/libavformat/subtitles.c
+++ b/libavformat/subtitles.c
@@ -26,6 +26,7 @@
 
 void ff_text_init_avio(FFTextReader *r, AVIOContext *pb)
 {
+    int can_seekback_2_bytes = pb->buf_end - pb->buf_ptr >= 2;
     int i;
     r->pb = pb;
     r->buf_pos = r->buf_len = 0;
@@ -38,6 +39,14 @@ void ff_text_init_avio(FFTextReader *r, AVIOContext *pb)
     } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) {
         r->type = FF_UTF16BE;
         r->buf_pos += 2;
+    } else if (can_seekback_2_bytes && r->buf[0] && !r->buf[1]) {
+        r->type = FF_UTF16LE; // with high probability
+        r->pb->buf_ptr -= 2;
+        r->buf_pos = r->buf_len = 0;
+    } else if (can_seekback_2_bytes && !r->buf[0] && r->buf[1]) {
+        r->type = FF_UTF16BE; // with high probability
+        r->pb->buf_ptr -= 2;
+        r->buf_pos = r->buf_len = 0;
     } else {
         r->buf[r->buf_len++] = avio_r8(r->pb);
         if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) {
-- 
2.1.0



More information about the ffmpeg-devel mailing list