FFmpeg
opus_parse.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Andrew D'Addesio
3  * Copyright (c) 2013-2014 Mozilla Corporation
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Opus decoder/parser shared code
25  */
26 
27 #include "libavutil/attributes.h"
29 #include "libavutil/error.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/log.h"
32 #include "libavutil/mem.h"
33 
34 #include "avcodec.h"
35 #include "internal.h"
36 #include "mathops.h"
37 #include "opus.h"
38 #include "opus_parse.h"
39 #include "vorbis_data.h"
40 
41 static const uint16_t opus_frame_duration[32] = {
42  480, 960, 1920, 2880,
43  480, 960, 1920, 2880,
44  480, 960, 1920, 2880,
45  480, 960,
46  480, 960,
47  120, 240, 480, 960,
48  120, 240, 480, 960,
49  120, 240, 480, 960,
50  120, 240, 480, 960,
51 };
52 
53 /**
54  * Read a 1- or 2-byte frame length
55  */
56 static inline int xiph_lacing_16bit(const uint8_t **ptr, const uint8_t *end)
57 {
58  int val;
59 
60  if (*ptr >= end)
61  return AVERROR_INVALIDDATA;
62  val = *(*ptr)++;
63  if (val >= 252) {
64  if (*ptr >= end)
65  return AVERROR_INVALIDDATA;
66  val += 4 * *(*ptr)++;
67  }
68  return val;
69 }
70 
71 /**
72  * Read a multi-byte length (used for code 3 packet padding size)
73  */
74 static inline int xiph_lacing_full(const uint8_t **ptr, const uint8_t *end)
75 {
76  int val = 0;
77  int next;
78 
79  while (1) {
80  if (*ptr >= end || val > INT_MAX - 254)
81  return AVERROR_INVALIDDATA;
82  next = *(*ptr)++;
83  val += next;
84  if (next < 255)
85  break;
86  else
87  val--;
88  }
89  return val;
90 }
91 
92 /**
93  * Parse Opus packet info from raw packet data
94  */
95 int ff_opus_parse_packet(OpusPacket *pkt, const uint8_t *buf, int buf_size,
96  int self_delimiting)
97 {
98  const uint8_t *ptr = buf;
99  const uint8_t *end = buf + buf_size;
100  int padding = 0;
101  int frame_bytes, i;
102 
103  if (buf_size < 1)
104  goto fail;
105 
106  /* TOC byte */
107  i = *ptr++;
108  pkt->code = (i ) & 0x3;
109  pkt->stereo = (i >> 2) & 0x1;
110  pkt->config = (i >> 3) & 0x1F;
111 
112  /* code 2 and code 3 packets have at least 1 byte after the TOC */
113  if (pkt->code >= 2 && buf_size < 2)
114  goto fail;
115 
116  switch (pkt->code) {
117  case 0:
118  /* 1 frame */
119  pkt->frame_count = 1;
120  pkt->vbr = 0;
121 
122  if (self_delimiting) {
123  int len = xiph_lacing_16bit(&ptr, end);
124  if (len < 0 || len > end - ptr)
125  goto fail;
126  end = ptr + len;
127  buf_size = end - buf;
128  }
129 
130  frame_bytes = end - ptr;
131  if (frame_bytes > OPUS_MAX_FRAME_SIZE)
132  goto fail;
133  pkt->frame_offset[0] = ptr - buf;
134  pkt->frame_size[0] = frame_bytes;
135  break;
136  case 1:
137  /* 2 frames, equal size */
138  pkt->frame_count = 2;
139  pkt->vbr = 0;
140 
141  if (self_delimiting) {
142  int len = xiph_lacing_16bit(&ptr, end);
143  if (len < 0 || 2 * len > end - ptr)
144  goto fail;
145  end = ptr + 2 * len;
146  buf_size = end - buf;
147  }
148 
149  frame_bytes = end - ptr;
150  if (frame_bytes & 1 || frame_bytes >> 1 > OPUS_MAX_FRAME_SIZE)
151  goto fail;
152  pkt->frame_offset[0] = ptr - buf;
153  pkt->frame_size[0] = frame_bytes >> 1;
154  pkt->frame_offset[1] = pkt->frame_offset[0] + pkt->frame_size[0];
155  pkt->frame_size[1] = frame_bytes >> 1;
156  break;
157  case 2:
158  /* 2 frames, different sizes */
159  pkt->frame_count = 2;
160  pkt->vbr = 1;
161 
162  /* read 1st frame size */
163  frame_bytes = xiph_lacing_16bit(&ptr, end);
164  if (frame_bytes < 0)
165  goto fail;
166 
167  if (self_delimiting) {
168  int len = xiph_lacing_16bit(&ptr, end);
169  if (len < 0 || len + frame_bytes > end - ptr)
170  goto fail;
171  end = ptr + frame_bytes + len;
172  buf_size = end - buf;
173  }
174 
175  pkt->frame_offset[0] = ptr - buf;
176  pkt->frame_size[0] = frame_bytes;
177 
178  /* calculate 2nd frame size */
179  frame_bytes = end - ptr - pkt->frame_size[0];
180  if (frame_bytes < 0 || frame_bytes > OPUS_MAX_FRAME_SIZE)
181  goto fail;
182  pkt->frame_offset[1] = pkt->frame_offset[0] + pkt->frame_size[0];
183  pkt->frame_size[1] = frame_bytes;
184  break;
185  case 3:
186  /* 1 to 48 frames, can be different sizes */
187  i = *ptr++;
188  pkt->frame_count = (i ) & 0x3F;
189  padding = (i >> 6) & 0x01;
190  pkt->vbr = (i >> 7) & 0x01;
191 
192  if (pkt->frame_count == 0 || pkt->frame_count > OPUS_MAX_FRAMES)
193  goto fail;
194 
195  /* read padding size */
196  if (padding) {
197  padding = xiph_lacing_full(&ptr, end);
198  if (padding < 0)
199  goto fail;
200  }
201 
202  /* read frame sizes */
203  if (pkt->vbr) {
204  /* for VBR, all frames except the final one have their size coded
205  in the bitstream. the last frame size is implicit. */
206  int total_bytes = 0;
207  for (i = 0; i < pkt->frame_count - 1; i++) {
208  frame_bytes = xiph_lacing_16bit(&ptr, end);
209  if (frame_bytes < 0)
210  goto fail;
211  pkt->frame_size[i] = frame_bytes;
212  total_bytes += frame_bytes;
213  }
214 
215  if (self_delimiting) {
216  int len = xiph_lacing_16bit(&ptr, end);
217  if (len < 0 || len + total_bytes + padding > end - ptr)
218  goto fail;
219  end = ptr + total_bytes + len + padding;
220  buf_size = end - buf;
221  }
222 
223  frame_bytes = end - ptr - padding;
224  if (total_bytes > frame_bytes)
225  goto fail;
226  pkt->frame_offset[0] = ptr - buf;
227  for (i = 1; i < pkt->frame_count; i++)
228  pkt->frame_offset[i] = pkt->frame_offset[i-1] + pkt->frame_size[i-1];
229  pkt->frame_size[pkt->frame_count-1] = frame_bytes - total_bytes;
230  } else {
231  /* for CBR, the remaining packet bytes are divided evenly between
232  the frames */
233  if (self_delimiting) {
234  frame_bytes = xiph_lacing_16bit(&ptr, end);
235  if (frame_bytes < 0 || pkt->frame_count * frame_bytes + padding > end - ptr)
236  goto fail;
237  end = ptr + pkt->frame_count * frame_bytes + padding;
238  buf_size = end - buf;
239  } else {
240  frame_bytes = end - ptr - padding;
241  if (frame_bytes % pkt->frame_count ||
242  frame_bytes / pkt->frame_count > OPUS_MAX_FRAME_SIZE)
243  goto fail;
244  frame_bytes /= pkt->frame_count;
245  }
246 
247  pkt->frame_offset[0] = ptr - buf;
248  pkt->frame_size[0] = frame_bytes;
249  for (i = 1; i < pkt->frame_count; i++) {
250  pkt->frame_offset[i] = pkt->frame_offset[i-1] + pkt->frame_size[i-1];
251  pkt->frame_size[i] = frame_bytes;
252  }
253  }
254  }
255 
256  pkt->packet_size = buf_size;
257  pkt->data_size = pkt->packet_size - padding;
258 
259  /* total packet duration cannot be larger than 120ms */
260  pkt->frame_duration = opus_frame_duration[pkt->config];
261  if (pkt->frame_duration * pkt->frame_count > OPUS_MAX_PACKET_DUR)
262  goto fail;
263 
264  /* set mode and bandwidth */
265  if (pkt->config < 12) {
266  pkt->mode = OPUS_MODE_SILK;
267  pkt->bandwidth = pkt->config >> 2;
268  } else if (pkt->config < 16) {
269  pkt->mode = OPUS_MODE_HYBRID;
270  pkt->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND + (pkt->config >= 14);
271  } else {
272  pkt->mode = OPUS_MODE_CELT;
273  pkt->bandwidth = (pkt->config - 16) >> 2;
274  /* skip medium band */
275  if (pkt->bandwidth)
276  pkt->bandwidth++;
277  }
278 
279  return 0;
280 
281 fail:
282  memset(pkt, 0, sizeof(*pkt));
283  return AVERROR_INVALIDDATA;
284 }
285 
286 static int channel_reorder_vorbis(int nb_channels, int channel_idx)
287 {
288  return ff_vorbis_channel_layout_offsets[nb_channels - 1][channel_idx];
289 }
290 
291 static int channel_reorder_unknown(int nb_channels, int channel_idx)
292 {
293  return channel_idx;
294 }
295 
298 {
299  static const uint8_t default_channel_map[2] = { 0, 1 };
300 
301  int (*channel_reorder)(int, int) = channel_reorder_unknown;
302  int channels = avctx->ch_layout.nb_channels;
303 
304  const uint8_t *extradata, *channel_map;
305  int extradata_size;
306  int version, map_type, streams, stereo_streams, i, j, ret;
307  AVChannelLayout layout = { 0 };
308 
309  if (!avctx->extradata) {
310  if (channels > 2) {
311  av_log(avctx, AV_LOG_ERROR,
312  "Multichannel configuration without extradata.\n");
313  return AVERROR(EINVAL);
314  }
315  extradata = opus_default_extradata;
316  extradata_size = sizeof(opus_default_extradata);
317  } else {
318  extradata = avctx->extradata;
319  extradata_size = avctx->extradata_size;
320  }
321 
322  if (extradata_size < 19) {
323  av_log(avctx, AV_LOG_ERROR, "Invalid extradata size: %d\n",
324  extradata_size);
325  return AVERROR_INVALIDDATA;
326  }
327 
328  version = extradata[8];
329  if (version > 15) {
330  avpriv_request_sample(avctx, "Extradata version %d", version);
331  return AVERROR_PATCHWELCOME;
332  }
333 
334  avctx->delay = AV_RL16(extradata + 10);
335  if (avctx->internal)
336  avctx->internal->skip_samples = avctx->delay;
337 
338  channels = avctx->extradata ? extradata[9] : (channels == 1) ? 1 : 2;
339  if (!channels) {
340  av_log(avctx, AV_LOG_ERROR, "Zero channel count specified in the extradata\n");
341  return AVERROR_INVALIDDATA;
342  }
343 
344  s->gain_i = AV_RL16(extradata + 16);
345 
346  map_type = extradata[18];
347  if (!map_type) {
348  if (channels > 2) {
349  av_log(avctx, AV_LOG_ERROR,
350  "Channel mapping 0 is only specified for up to 2 channels\n");
352  goto fail;
353  }
356  streams = 1;
357  stereo_streams = channels - 1;
358  channel_map = default_channel_map;
359  } else if (map_type == 1 || map_type == 2 || map_type == 255) {
360  if (extradata_size < 21 + channels) {
361  av_log(avctx, AV_LOG_ERROR, "Invalid extradata size: %d\n",
362  extradata_size);
364  goto fail;
365  }
366 
367  streams = extradata[19];
368  stereo_streams = extradata[20];
369  if (!streams || stereo_streams > streams ||
370  streams + stereo_streams > 255) {
371  av_log(avctx, AV_LOG_ERROR,
372  "Invalid stream/stereo stream count: %d/%d\n", streams, stereo_streams);
374  goto fail;
375  }
376 
377  if (map_type == 1) {
378  if (channels > 8) {
379  av_log(avctx, AV_LOG_ERROR,
380  "Channel mapping 1 is only specified for up to 8 channels\n");
382  goto fail;
383  }
385  channel_reorder = channel_reorder_vorbis;
386  } else if (map_type == 2) {
387  int ambisonic_order = ff_sqrt(channels) - 1;
388  if (channels != ((ambisonic_order + 1) * (ambisonic_order + 1)) &&
389  channels != ((ambisonic_order + 1) * (ambisonic_order + 1) + 2)) {
390  av_log(avctx, AV_LOG_ERROR,
391  "Channel mapping 2 is only specified for channel counts"
392  " which can be written as (n + 1)^2 or (n + 1)^2 + 2"
393  " for nonnegative integer n\n");
395  goto fail;
396  }
397  if (channels > 227) {
398  av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
400  goto fail;
401  }
402 
404  layout.nb_channels = channels;
405  if (channels != ((ambisonic_order + 1) * (ambisonic_order + 1)))
406  layout.u.mask = AV_CH_LAYOUT_STEREO;
407  } else {
409  layout.nb_channels = channels;
410  }
411 
412  channel_map = extradata + 21;
413  } else {
414  avpriv_request_sample(avctx, "Mapping type %d", map_type);
415  return AVERROR_PATCHWELCOME;
416  }
417 
418  s->channel_maps = av_calloc(channels, sizeof(*s->channel_maps));
419  if (!s->channel_maps) {
420  ret = AVERROR(ENOMEM);
421  goto fail;
422  }
423 
424  for (i = 0; i < channels; i++) {
425  ChannelMap *map = &s->channel_maps[i];
426  uint8_t idx = channel_map[channel_reorder(channels, i)];
427 
428  if (idx == 255) {
429  map->silence = 1;
430  continue;
431  } else if (idx >= streams + stereo_streams) {
432  av_log(avctx, AV_LOG_ERROR,
433  "Invalid channel map for output channel %d: %d\n", i, idx);
434  av_freep(&s->channel_maps);
436  goto fail;
437  }
438 
439  /* check that we did not see this index yet */
440  map->copy = 0;
441  for (j = 0; j < i; j++)
442  if (channel_map[channel_reorder(channels, j)] == idx) {
443  map->copy = 1;
444  map->copy_idx = j;
445  break;
446  }
447 
448  if (idx < 2 * stereo_streams) {
449  map->stream_idx = idx / 2;
450  map->channel_idx = idx & 1;
451  } else {
452  map->stream_idx = idx - stereo_streams;
453  map->channel_idx = 0;
454  }
455  }
456 
458  if (ret < 0)
459  goto fail;
460 
461  s->nb_streams = streams;
462  s->nb_stereo_streams = stereo_streams;
463 
464  return 0;
465 fail:
467  return ret;
468 }
469 
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
xiph_lacing_full
static int xiph_lacing_full(const uint8_t **ptr, const uint8_t *end)
Read a multi-byte length (used for code 3 packet padding size)
Definition: opus_parse.c:74
AV_CHANNEL_LAYOUT_STEREO
#define AV_CHANNEL_LAYOUT_STEREO
Definition: channel_layout.h:387
channel_reorder_vorbis
static int channel_reorder_vorbis(int nb_channels, int channel_idx)
Definition: opus_parse.c:286
opus_default_extradata
static const uint8_t opus_default_extradata[30]
Definition: opus.h:35
AVCodecInternal::skip_samples
int skip_samples
Number of audio samples to skip at the start of the next decoded frame.
Definition: internal.h:125
OPUS_MAX_FRAME_SIZE
#define OPUS_MAX_FRAME_SIZE
Definition: opus.h:28
internal.h
opus.h
AVChannelLayout::nb_channels
int nb_channels
Number of channels in this layout.
Definition: channel_layout.h:321
vorbis_data.h
AVCodecContext::delay
int delay
Codec delay.
Definition: avcodec.h:601
opus_parse.h
AVCodecContext::ch_layout
AVChannelLayout ch_layout
Audio channel layout.
Definition: avcodec.h:1065
fail
#define fail()
Definition: checkasm.h:188
val
static double val(void *priv, double ch)
Definition: aeval.c:77
ff_sqrt
#define ff_sqrt
Definition: mathops.h:216
AV_CH_LAYOUT_STEREO
#define AV_CH_LAYOUT_STEREO
Definition: channel_layout.h:213
pkt
AVPacket * pkt
Definition: movenc.c:60
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
AVCodecContext::extradata_size
int extradata_size
Definition: avcodec.h:524
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CHANNEL_ORDER_UNSPEC
@ AV_CHANNEL_ORDER_UNSPEC
Only the channel count is specified, without any further information about the channel order.
Definition: channel_layout.h:116
channels
channels
Definition: aptx.h:31
xiph_lacing_16bit
static int xiph_lacing_16bit(const uint8_t **ptr, const uint8_t *end)
Read a 1- or 2-byte frame length.
Definition: opus_parse.c:56
AV_RL16
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_WL32 unsigned int_TMPL AV_WL24 unsigned int_TMPL AV_RL16
Definition: bytestream.h:94
channel_map
static const uint8_t channel_map[8][8]
Definition: atrac3plusdec.c:52
OPUS_MODE_CELT
@ OPUS_MODE_CELT
Definition: opus.h:44
AV_CHANNEL_ORDER_AMBISONIC
@ AV_CHANNEL_ORDER_AMBISONIC
The audio is represented as the decomposition of the sound field into spherical harmonics.
Definition: channel_layout.h:152
AVERROR_PATCHWELCOME
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:64
AVCodecContext::internal
struct AVCodecInternal * internal
Private context used for internal data.
Definition: avcodec.h:480
mathops.h
OPUS_MAX_PACKET_DUR
#define OPUS_MAX_PACKET_DUR
Definition: opus.h:30
OpusParseContext
Definition: opus_parse.h:63
OPUS_BANDWIDTH_SUPERWIDEBAND
@ OPUS_BANDWIDTH_SUPERWIDEBAND
Definition: opus.h:53
error.h
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:311
attributes.h
version
version
Definition: libkvazaar.c:321
ff_vorbis_channel_layout_offsets
const uint8_t ff_vorbis_channel_layout_offsets[8][8]
Definition: vorbis_data.c:26
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
log.h
OPUS_MAX_FRAMES
#define OPUS_MAX_FRAMES
Definition: opus.h:29
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AVCodecContext::extradata
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:523
channel_reorder_unknown
static int channel_reorder_unknown(int nb_channels, int channel_idx)
Definition: opus_parse.c:291
OPUS_MODE_HYBRID
@ OPUS_MODE_HYBRID
Definition: opus.h:43
len
int len
Definition: vorbis_enc_data.h:426
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
avcodec.h
ret
ret
Definition: filter_design.txt:187
AVCodecContext
main external API structure.
Definition: avcodec.h:445
channel_layout.h
opus_frame_duration
static const uint16_t opus_frame_duration[32]
Definition: opus_parse.c:41
OPUS_MODE_SILK
@ OPUS_MODE_SILK
Definition: opus.h:42
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:437
OpusPacket
Definition: opus_parse.h:31
ChannelMap
Definition: opus_parse.h:48
av_channel_layout_copy
int av_channel_layout_copy(AVChannelLayout *dst, const AVChannelLayout *src)
Make a copy of a channel layout.
Definition: channel_layout.c:444
mem.h
avpriv_request_sample
#define avpriv_request_sample(...)
Definition: tableprint_vlc.h:36
ff_vorbis_ch_layouts
const AVChannelLayout ff_vorbis_ch_layouts[9]
Definition: vorbis_data.c:37
map
const VDPAUPixFmtMap * map
Definition: hwcontext_vdpau.c:71
AV_CHANNEL_LAYOUT_MONO
#define AV_CHANNEL_LAYOUT_MONO
Definition: channel_layout.h:386
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
ff_opus_parse_extradata
av_cold int ff_opus_parse_extradata(AVCodecContext *avctx, OpusParseContext *s)
Definition: opus_parse.c:296
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
int
int
Definition: ffmpeg_filter.c:424
ff_opus_parse_packet
int ff_opus_parse_packet(OpusPacket *pkt, const uint8_t *buf, int buf_size, int self_delimiting)
Parse Opus packet info from raw packet data.
Definition: opus_parse.c:95