[FFmpeg-devel] [PATCH 1/3] sbc: implement SBC codec (low-complexity subband codec)

Aurelien Jacobs aurel at gnuage.org
Mon Nov 6 01:35:18 EET 2017


This was originally based on libsbc, and was fully integrated into ffmpeg.
---
 doc/general.texi                 |   2 +
 libavcodec/Makefile              |   4 +
 libavcodec/allcodecs.c           |   2 +
 libavcodec/arm/Makefile          |   3 +
 libavcodec/arm/sbcdsp_armv6.S    | 245 ++++++++++++++
 libavcodec/arm/sbcdsp_init_arm.c | 105 ++++++
 libavcodec/arm/sbcdsp_neon.S     | 714 +++++++++++++++++++++++++++++++++++++++
 libavcodec/avcodec.h             |   2 +
 libavcodec/codec_desc.c          |  12 +
 libavcodec/sbc.c                 | 316 +++++++++++++++++
 libavcodec/sbc.h                 | 121 +++++++
 libavcodec/sbcdec.c              | 469 +++++++++++++++++++++++++
 libavcodec/sbcdec_data.c         | 127 +++++++
 libavcodec/sbcdec_data.h         |  44 +++
 libavcodec/sbcdsp.c              | 569 +++++++++++++++++++++++++++++++
 libavcodec/sbcdsp.h              |  86 +++++
 libavcodec/sbcdsp_data.c         | 335 ++++++++++++++++++
 libavcodec/sbcdsp_data.h         |  57 ++++
 libavcodec/sbcenc.c              | 461 +++++++++++++++++++++++++
 libavcodec/x86/Makefile          |   2 +
 libavcodec/x86/sbcdsp.asm        | 290 ++++++++++++++++
 libavcodec/x86/sbcdsp_init.c     |  51 +++
 22 files changed, 4017 insertions(+)
 create mode 100644 libavcodec/arm/sbcdsp_armv6.S
 create mode 100644 libavcodec/arm/sbcdsp_init_arm.c
 create mode 100644 libavcodec/arm/sbcdsp_neon.S
 create mode 100644 libavcodec/sbc.c
 create mode 100644 libavcodec/sbc.h
 create mode 100644 libavcodec/sbcdec.c
 create mode 100644 libavcodec/sbcdec_data.c
 create mode 100644 libavcodec/sbcdec_data.h
 create mode 100644 libavcodec/sbcdsp.c
 create mode 100644 libavcodec/sbcdsp.h
 create mode 100644 libavcodec/sbcdsp_data.c
 create mode 100644 libavcodec/sbcdsp_data.h
 create mode 100644 libavcodec/sbcenc.c
 create mode 100644 libavcodec/x86/sbcdsp.asm
 create mode 100644 libavcodec/x86/sbcdsp_init.c

diff --git a/doc/general.texi b/doc/general.texi
index 9e6ae13435..baaa308dcf 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -1096,6 +1096,8 @@ following image formats are supported:
     @tab Real low bitrate AC-3 codec
 @item RealAudio Lossless     @tab     @tab  X
 @item RealAudio SIPR / ACELP.NET @tab     @tab  X
+ at item SBC (low-complexity subband codec) @tab  X  @tab  X
+    @tab Used in Bluetooth A2DP
 @item Shorten                @tab     @tab  X
 @item Sierra VMD audio       @tab     @tab  X
     @tab Used in Sierra VMD files.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3a33361f33..17648a1c3d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -576,6 +576,10 @@ OBJS-$(CONFIG_SUBVIEWER_DECODER)       += subviewerdec.o ass.o
 OBJS-$(CONFIG_SUNRAST_DECODER)         += sunrast.o
 OBJS-$(CONFIG_SUNRAST_ENCODER)         += sunrastenc.o
 OBJS-$(CONFIG_LIBRSVG_DECODER)         += librsvgdec.o
+OBJS-$(CONFIG_SBC_DECODER)             += sbcdec.o sbcdec_data.o sbc.o
+OBJS-$(CONFIG_SBC_ENCODER)             += sbcenc.o sbc.o sbcdsp.o sbcdsp_data.o
+OBJS-$(CONFIG_MSBC_DECODER)            += sbcdec.o sbcdec_data.o sbc.o
+OBJS-$(CONFIG_MSBC_ENCODER)            += sbcenc.o sbc.o sbcdsp.o sbcdsp_data.o
 OBJS-$(CONFIG_SVQ1_DECODER)            += svq1dec.o svq1.o svq13.o h263data.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o  h263data.o  \
                                           h263.o ituh263enc.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 98655ddd7c..95cf67ce20 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -452,6 +452,7 @@ static void register_all(void)
     REGISTER_DECODER(MP3ON4FLOAT,       mp3on4float);
     REGISTER_DECODER(MPC7,              mpc7);
     REGISTER_DECODER(MPC8,              mpc8);
+    REGISTER_ENCDEC (MSBC,              msbc);
     REGISTER_ENCDEC (NELLYMOSER,        nellymoser);
     REGISTER_DECODER(ON2AVC,            on2avc);
     REGISTER_ENCDEC (OPUS,              opus);
@@ -465,6 +466,7 @@ static void register_all(void)
     REGISTER_DECODER(SHORTEN,           shorten);
     REGISTER_DECODER(SIPR,              sipr);
     REGISTER_DECODER(SMACKAUD,          smackaud);
+    REGISTER_ENCDEC (SBC,               sbc);
     REGISTER_ENCDEC (SONIC,             sonic);
     REGISTER_ENCODER(SONIC_LS,          sonic_ls);
     REGISTER_DECODER(TAK,               tak);
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 1eeac5449e..fd2401f4e5 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -42,6 +42,7 @@ OBJS-$(CONFIG_DCA_DECODER)             += arm/synth_filter_init_arm.o
 OBJS-$(CONFIG_HEVC_DECODER)            += arm/hevcdsp_init_arm.o
 OBJS-$(CONFIG_MLP_DECODER)             += arm/mlpdsp_init_arm.o
 OBJS-$(CONFIG_RV40_DECODER)            += arm/rv40dsp_init_arm.o
+OBJS-$(CONFIG_SBC_ENCODER)             += arm/sbcdsp_init_arm.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += arm/vorbisdsp_init_arm.o
 OBJS-$(CONFIG_VP6_DECODER)             += arm/vp6dsp_init_arm.o
 OBJS-$(CONFIG_VP9_DECODER)             += arm/vp9dsp_init_10bpp_arm.o   \
@@ -81,6 +82,7 @@ ARMV6-OBJS-$(CONFIG_VP8DSP)            += arm/vp8_armv6.o               \
 
 # decoders/encoders
 ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
+ARMV6-OBJS-$(CONFIG_SBC_ENCODER)       += arm/sbcdsp_armv6.o
 
 
 # VFP optimizations
@@ -140,6 +142,7 @@ NEON-OBJS-$(CONFIG_HEVC_DECODER)       += arm/hevcdsp_init_neon.o       \
 NEON-OBJS-$(CONFIG_RV30_DECODER)       += arm/rv34dsp_neon.o
 NEON-OBJS-$(CONFIG_RV40_DECODER)       += arm/rv34dsp_neon.o            \
                                           arm/rv40dsp_neon.o
+NEON-OBJS-$(CONFIG_SBC_ENCODER)        += arm/sbcdsp_neon.o
 NEON-OBJS-$(CONFIG_VORBIS_DECODER)     += arm/vorbisdsp_neon.o
 NEON-OBJS-$(CONFIG_VP6_DECODER)        += arm/vp6dsp_neon.o
 NEON-OBJS-$(CONFIG_VP9_DECODER)        += arm/vp9itxfm_16bpp_neon.o     \
diff --git a/libavcodec/arm/sbcdsp_armv6.S b/libavcodec/arm/sbcdsp_armv6.S
new file mode 100644
index 0000000000..f1ff845798
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_armv6.S
@@ -0,0 +1,245 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_sbc_analyze_4_armv6, export=1
+        @ r0 = in, r1 = out, r2 = consts
+        push            {r1, r3-r7, lr}
+        push            {r8-r12, r14}
+        ldrd            r4,  r5,  [r0, #0]
+        ldrd            r6,  r7,  [r2, #0]
+        ldrd            r8,  r9,  [r0, #16]
+        ldrd            r10, r11, [r2, #16]
+        mov             r14, #0x8000
+        smlad           r3,  r4,  r6,  r14
+        smlad           r12, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #32]
+        ldrd            r6,  r7,  [r2, #32]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #48]
+        ldrd            r10, r11, [r2, #48]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #64]
+        ldrd            r6,  r7,  [r2, #64]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #8]
+        ldrd            r10, r11, [r2, #8]
+        smlad           r3,  r4,  r6,  r3        @ t1[0] is done
+        smlad           r12, r5,  r7,  r12       @ t1[1] is done
+        ldrd            r4,  r5,  [r0, #24]
+        ldrd            r6,  r7,  [r2, #24]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
+        smlad           r12, r8,  r10, r14
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #40]
+        ldrd            r10, r11, [r2, #40]
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #56]
+        ldrd            r6,  r7,  [r2, #56]
+        smlad           r12, r8,  r10, r12
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #72]
+        ldrd            r10, r11, [r2, #72]
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r2, #80]      @ start loading cos table
+        smlad           r12, r8,  r10, r12       @ t1[2] is done
+        smlad           r14, r9,  r11, r14       @ t1[3] is done
+        ldrd            r6,  r7,  [r2, #88]
+        ldrd            r8,  r9,  [r2, #96]
+        ldrd            r10, r11, [r2, #104]     @ cos table fully loaded
+        pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
+        smuad           r4,  r3,  r4
+        smuad           r5,  r3,  r5
+        smlad           r4,  r12, r8,  r4
+        smlad           r5,  r12, r9,  r5
+        smuad           r6,  r3,  r6
+        smuad           r7,  r3,  r7
+        smlad           r6,  r12, r10, r6
+        smlad           r7,  r12, r11, r7
+        pop             {r8-r12, r14}
+        stmia           r1, {r4, r5, r6, r7}
+        pop             {r1, r3-r7, pc}
+endfunc
+
+function ff_sbc_analyze_8_armv6, export=1
+        @ r0 = in, r1 = out, r2 = consts
+        push            {r1, r3-r7, lr}
+        push            {r8-r12, r14}
+        ldrd            r4,  r5,  [r0, #24]
+        ldrd            r6,  r7,  [r2, #24]
+        ldrd            r8,  r9,  [r0, #56]
+        ldrd            r10, r11, [r2, #56]
+        mov             r14, #0x8000
+        smlad           r3,  r4,  r6,  r14
+        smlad           r12, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #88]
+        ldrd            r6,  r7,  [r2, #88]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #120]
+        ldrd            r10, r11, [r2, #120]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #152]
+        ldrd            r6,  r7,  [r2, #152]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #16]
+        ldrd            r10, r11, [r2, #16]
+        smlad           r3,  r4,  r6,  r3        @ t1[6] is done
+        smlad           r12, r5,  r7,  r12       @ t1[7] is done
+        ldrd            r4,  r5,  [r0, #48]
+        ldrd            r6,  r7,  [r2, #48]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[6] and t1[7]
+        str             r3,  [sp, #-4]!          @ save to stack
+        smlad           r3,  r8,  r10, r14
+        smlad           r12, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #80]
+        ldrd            r10, r11, [r2, #80]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #112]
+        ldrd            r6,  r7,  [r2, #112]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #144]
+        ldrd            r10, r11, [r2, #144]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #0]
+        ldrd            r6,  r7,  [r2, #0]
+        smlad           r3,  r8,  r10, r3        @ t1[4] is done
+        smlad           r12, r9,  r11, r12       @ t1[5] is done
+        ldrd            r8,  r9,  [r0, #32]
+        ldrd            r10, r11, [r2, #32]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[4] and t1[5]
+        str             r3,  [sp, #-4]!          @ save to stack
+        smlad           r3,  r4,  r6,  r14
+        smlad           r12, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #64]
+        ldrd            r6,  r7,  [r2, #64]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #96]
+        ldrd            r10, r11, [r2, #96]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #128]
+        ldrd            r6,  r7,  [r2, #128]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #8]
+        ldrd            r10, r11, [r2, #8]
+        smlad           r3,  r4,  r6,  r3        @ t1[0] is done
+        smlad           r12, r5,  r7,  r12       @ t1[1] is done
+        ldrd            r4,  r5,  [r0, #40]
+        ldrd            r6,  r7,  [r2, #40]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
+        smlad           r12, r8,  r10, r14
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #72]
+        ldrd            r10, r11, [r2, #72]
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #104]
+        ldrd            r6,  r7,  [r2, #104]
+        smlad           r12, r8,  r10, r12
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #136]
+        ldrd            r10, r11, [r2, #136]!
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r2, #(160 - 136 + 0)]
+        smlad           r12, r8,  r10, r12       @ t1[2] is done
+        smlad           r14, r9,  r11, r14       @ t1[3] is done
+        ldrd            r6,  r7,  [r2, #(160 - 136 + 8)]
+        smuad           r4,  r3,  r4
+        smuad           r5,  r3,  r5
+        pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
+                                                 @ r3  = t2[0:1]
+                                                 @ r12 = t2[2:3]
+        pop             {r0, r14}                @ t2[4:5], t2[6:7]
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 32)]
+        smuad           r6,  r3,  r6
+        smuad           r7,  r3,  r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 40)]
+        smlad           r4,  r12, r8,  r4
+        smlad           r5,  r12, r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 64)]
+        smlad           r6,  r12, r10, r6
+        smlad           r7,  r12, r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 72)]
+        smlad           r4,  r0,  r8,  r4
+        smlad           r5,  r0,  r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 96)]
+        smlad           r6,  r0,  r10, r6
+        smlad           r7,  r0,  r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 104)]
+        smlad           r4,  r14, r8,  r4
+        smlad           r5,  r14, r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 0)]
+        smlad           r6,  r14, r10, r6
+        smlad           r7,  r14, r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 8)]
+        stmia           r1!, {r4, r5}
+        smuad           r4,  r3,  r8
+        smuad           r5,  r3,  r9
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 32)]
+        stmia           r1!, {r6, r7}
+        smuad           r6,  r3,  r10
+        smuad           r7,  r3,  r11
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 40)]
+        smlad           r4,  r12, r8,  r4
+        smlad           r5,  r12, r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 64)]
+        smlad           r6,  r12, r10, r6
+        smlad           r7,  r12, r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 72)]
+        smlad           r4,  r0,  r8,  r4
+        smlad           r5,  r0,  r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 96)]
+        smlad           r6,  r0,  r10, r6
+        smlad           r7,  r0,  r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 104)]
+        smlad           r4,  r14, r8,  r4
+        smlad           r5,  r14, r9,  r5
+        smlad           r6,  r14, r10, r6
+        smlad           r7,  r14, r11, r7
+        pop             {r8-r12, r14}
+        stmia           r1!, {r4, r5, r6, r7}
+        pop             {r1, r3-r7, pc}
+endfunc
diff --git a/libavcodec/arm/sbcdsp_init_arm.c b/libavcodec/arm/sbcdsp_init_arm.c
new file mode 100644
index 0000000000..6bf7e729ef
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_init_arm.c
@@ -0,0 +1,105 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARMv6 optimization for some basic "building bricks"
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/sbcdsp.h"
+
+void ff_sbc_analyze_4_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
+
+void ff_sbc_analyze_4_neon(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_neon(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_calc_scalefactors_neon(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int channels, int subbands);
+int ff_sbc_calc_scalefactors_j_neon(int32_t sb_sample_f[16][2][8],
+                                    uint32_t scale_factor[2][8],
+                                    int blocks, int subbands);
+int ff_sbc_enc_process_input_4s_neon(int position, const uint8_t *pcm,
+                                     int16_t X[2][SBC_X_BUFFER_SIZE],
+                                     int nsamples, int nchannels);
+int ff_sbc_enc_process_input_8s_neon(int position, const uint8_t *pcm,
+                                     int16_t X[2][SBC_X_BUFFER_SIZE],
+                                     int nsamples, int nchannels);
+
+DECLARE_ALIGNED(SBC_ALIGN, int32_t, ff_sbcdsp_joint_bits_mask)[8] = {
+    8,   4,  2,  1, 128, 64, 32, 16
+};
+
+#if HAVE_BIGENDIAN
+#define PERM(a, b, c, d) {        \
+        (a * 2) + 1, (a * 2) + 0, \
+        (b * 2) + 1, (b * 2) + 0, \
+        (c * 2) + 1, (c * 2) + 0, \
+        (d * 2) + 1, (d * 2) + 0  \
+    }
+#else
+#define PERM(a, b, c, d) {        \
+        (a * 2) + 0, (a * 2) + 1, \
+        (b * 2) + 0, (b * 2) + 1, \
+        (c * 2) + 0, (c * 2) + 1, \
+        (d * 2) + 0, (d * 2) + 1  \
+    }
+#endif
+
+DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_4)[2][8] = {
+    PERM(7, 3, 6, 4),
+    PERM(0, 2, 1, 5)
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_8)[4][8] = {
+    PERM(15, 7, 14,  8),
+    PERM(13, 9, 12, 10),
+    PERM(11, 3,  6,  0),
+    PERM( 5, 1,  4,  2)
+};
+
+av_cold void ff_sbcdsp_init_arm(SBCDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_armv6(cpu_flags)) {
+        s->sbc_analyze_4 = ff_sbc_analyze_4_armv6;
+        s->sbc_analyze_8 = ff_sbc_analyze_8_armv6;
+    }
+
+    if (have_neon(cpu_flags)) {
+        s->sbc_analyze_4 = ff_sbc_analyze_4_neon;
+        s->sbc_analyze_8 = ff_sbc_analyze_8_neon;
+        s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_neon;
+        s->sbc_calc_scalefactors_j = ff_sbc_calc_scalefactors_j_neon;
+        if (s->increment != 1) {
+            s->sbc_enc_process_input_4s = ff_sbc_enc_process_input_4s_neon;
+            s->sbc_enc_process_input_8s = ff_sbc_enc_process_input_8s_neon;
+        }
+    }
+}
diff --git a/libavcodec/arm/sbcdsp_neon.S b/libavcodec/arm/sbcdsp_neon.S
new file mode 100644
index 0000000000..d83d21d202
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_neon.S
@@ -0,0 +1,714 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARM NEON optimizations
+ */
+
+#include "libavutil/arm/asm.S"
+#include "neon.S"
+
+#define SBC_PROTO_FIXED_SCALE 16
+
+function ff_sbc_analyze_4_neon, export=1
+        /* TODO: merge even and odd cases (or even merge all four calls to this
+         * function) in order to have only aligned reads from 'in' array
+         * and reduce number of load instructions */
+        vld1.16         {d4, d5}, [r0, :64]!
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmull.s16       q0, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmull.s16       q1, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+
+        vmlal.s16       q0, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q1, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q0, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q1, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+
+        vmlal.s16       q0, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q1, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q0, d4, d8
+        vmlal.s16       q1, d5, d9
+
+        vpadd.s32       d0, d0, d1
+        vpadd.s32       d1, d2, d3
+
+        vrshrn.s32      d0, q0, SBC_PROTO_FIXED_SCALE
+
+        vld1.16         {d2, d3, d4, d5}, [r2, :128]!
+
+        vdup.i32        d1, d0[1]  /* TODO: can be eliminated */
+        vdup.i32        d0, d0[0]  /* TODO: can be eliminated */
+
+        vmull.s16       q3, d2, d0
+        vmull.s16       q4, d3, d0
+        vmlal.s16       q3, d4, d1
+        vmlal.s16       q4, d5, d1
+
+        vpadd.s32       d0, d6, d7 /* TODO: can be eliminated */
+        vpadd.s32       d1, d8, d9 /* TODO: can be eliminated */
+
+        vst1.32         {d0, d1}, [r1, :128]
+
+        bx              lr
+endfunc
+
+function ff_sbc_analyze_8_neon, export=1
+        /* TODO: merge even and odd cases (or even merge all four calls to this
+         * function) in order to have only aligned reads from 'in' array
+         * and reduce number of load instructions */
+        vld1.16         {d4, d5}, [r0, :64]!
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmull.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmull.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmull.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmull.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmlal.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmlal.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmlal.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+
+        vmlal.s16       q8, d6, d10
+        vmlal.s16       q9, d7, d11
+
+        vpadd.s32       d0, d12, d13
+        vpadd.s32       d1, d14, d15
+        vpadd.s32       d2, d16, d17
+        vpadd.s32       d3, d18, d19
+
+        vrshr.s32       q0, q0, SBC_PROTO_FIXED_SCALE
+        vrshr.s32       q1, q1, SBC_PROTO_FIXED_SCALE
+        vmovn.s32       d0, q0
+        vmovn.s32       d1, q1
+
+        vdup.i32        d3, d1[1]  /* TODO: can be eliminated */
+        vdup.i32        d2, d1[0]  /* TODO: can be eliminated */
+        vdup.i32        d1, d0[1]  /* TODO: can be eliminated */
+        vdup.i32        d0, d0[0]  /* TODO: can be eliminated */
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmull.s16       q6, d4, d0
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmull.s16       q7, d5, d0
+        vmull.s16       q8, d6, d0
+        vmull.s16       q9, d7, d0
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmlal.s16       q6, d4, d1
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmlal.s16       q7, d5, d1
+        vmlal.s16       q8, d6, d1
+        vmlal.s16       q9, d7, d1
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmlal.s16       q6, d4, d2
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmlal.s16       q7, d5, d2
+        vmlal.s16       q8, d6, d2
+        vmlal.s16       q9, d7, d2
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmlal.s16       q6, d4, d3
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmlal.s16       q7, d5, d3
+        vmlal.s16       q8, d6, d3
+        vmlal.s16       q9, d7, d3
+
+        vpadd.s32       d0, d12, d13 /* TODO: can be eliminated */
+        vpadd.s32       d1, d14, d15 /* TODO: can be eliminated */
+        vpadd.s32       d2, d16, d17 /* TODO: can be eliminated */
+        vpadd.s32       d3, d18, d19 /* TODO: can be eliminated */
+
+        vst1.32         {d0, d1, d2, d3}, [r1, :128]
+
+        bx              lr
+endfunc
+
+function ff_sbc_calc_scalefactors_neon, export=1
+        @ parameters
+        @ r0 = sb_sample_f
+        @ r1 = scale_factor
+        @ r2 = blocks
+        @ r3 = channels
+        @ r4 = subbands
+        @ local variables
+        @ r5 = in_loop_1
+        @ r6 = in
+        @ r7 = out_loop_1
+        @ r8 = out
+        @ r9 = ch
+        @ r10 = sb
+        @ r11 = inc
+        @ r12 = blk
+
+        push            {r1-r2, r4-r12}
+        ldr             r4,  [sp, #44]
+        mov             r11, #64
+
+        mov             r9,  #0
+1:
+        add             r5,  r0,  r9, lsl#5
+        add             r7,  r1,  r9, lsl#5
+
+        mov             r10,  #0
+2:
+        add             r6,  r5,  r10, lsl#2
+        add             r8,  r7,  r10, lsl#2
+        mov             r12, r2
+
+        vmov.s32        q0,  #0
+        vmov.s32        q1,  #0x8000            @ 1 << SCALE_OUT_BITS
+        vmov.s32        q14, #1
+        vmov.s32        q15, #16                @ 31 - SCALE_OUT_BITS
+        vadd.s32        q1,  q1,  q14
+3:
+        vld1.32         {d16, d17}, [r6, :128], r11
+        vabs.s32        q8,  q8
+        vld1.32         {d18, d19}, [r6, :128], r11
+        vabs.s32        q9,  q9
+        vld1.32         {d20, d21}, [r6, :128], r11
+        vabs.s32        q10, q10
+        vld1.32         {d22, d23}, [r6, :128], r11
+        vabs.s32        q11, q11
+        vmax.s32        q0,  q0,  q8
+        vmax.s32        q1,  q1,  q9
+        vmax.s32        q0,  q0,  q10
+        vmax.s32        q1,  q1,  q11
+        subs            r12, r12, #4
+        bgt             3b
+        vmax.s32        q0,  q0,  q1
+        vsub.s32        q0,  q0,  q14
+        vclz.s32        q0,  q0
+        vsub.s32        q0,  q15, q0
+        vst1.32         {d0, d1}, [r8, :128]
+
+        add             r10, r10, #4
+        cmp             r10, r4
+        blt             2b
+
+        add             r9,  r9,  #1
+        cmp             r9,  r3
+        blt             1b
+
+        pop             {r1-r2, r4-r12}
+        bx              lr
+endfunc
+
+/*
+ * constants: q13 = (31 - SCALE_OUT_BITS)
+ *            q14 = 1
+ * input:     q0  - ((1 << SCALE_OUT_BITS) + 1)
+ *            r5  - samples for channel 0
+ *            r6  - samples for shannel 1
+ * output:    q0, q1 - scale factors without joint stereo
+ *            q2, q3 - scale factors with joint stereo
+ *            q15    - joint stereo selection mask
+ */
+.macro calc_scalefactors
+        vmov.s32        q1,  q0
+        vmov.s32        q2,  q0
+        vmov.s32        q3,  q0
+        mov             r3,  r2
+1:
+        vld1.32         {d18, d19}, [r6, :128], r11
+        vbic.s32        q11, q9,  q14
+        vld1.32         {d16, d17}, [r5, :128], r11
+        vhadd.s32       q10, q8,  q11
+        vhsub.s32       q11, q8,  q11
+        vabs.s32        q8,  q8
+        vabs.s32        q9,  q9
+        vabs.s32        q10, q10
+        vabs.s32        q11, q11
+        vmax.s32        q0,  q0,  q8
+        vmax.s32        q1,  q1,  q9
+        vmax.s32        q2,  q2,  q10
+        vmax.s32        q3,  q3,  q11
+        subs            r3,  r3,  #1
+        bgt             1b
+        vsub.s32        q0,  q0,  q14
+        vsub.s32        q1,  q1,  q14
+        vsub.s32        q2,  q2,  q14
+        vsub.s32        q3,  q3,  q14
+        vclz.s32        q0,  q0
+        vclz.s32        q1,  q1
+        vclz.s32        q2,  q2
+        vclz.s32        q3,  q3
+        vsub.s32        q0,  q13, q0
+        vsub.s32        q1,  q13, q1
+        vsub.s32        q2,  q13, q2
+        vsub.s32        q3,  q13, q3
+.endm
+
+/*
+ * constants: q14 = 1
+ * input: q15 - joint stereo selection mask
+ *        r5  - value set by calc_scalefactors macro
+ *        r6  - value set by calc_scalefactors macro
+ */
+.macro update_joint_stereo_samples
+        sub             r8,  r6,  r11
+        sub             r7,  r5,  r11
+        sub             r6,  r6,  r11, asl #1
+        sub             r5,  r5,  r11, asl #1
+        vld1.32         {d18, d19}, [r6, :128]
+        vbic.s32        q11, q9,  q14
+        vld1.32         {d16, d17}, [r5, :128]
+        vld1.32         {d2, d3}, [r8, :128]
+        vbic.s32        q3,  q1,  q14
+        vld1.32         {d0, d1}, [r7, :128]
+        vhsub.s32       q10, q8,  q11
+        vhadd.s32       q11, q8,  q11
+        vhsub.s32       q2,  q0,  q3
+        vhadd.s32       q3,  q0,  q3
+        vbif.s32        q10, q9,  q15
+        vbif.s32        d22, d16, d30
+        sub             r11, r10, r11, asl #1
+        sub             r3,  r2,  #2
+2:
+        vbif.s32        d23, d17, d31
+        vst1.32         {d20, d21}, [r6, :128], r11
+        vbif.s32        d4,  d2,  d30
+        vld1.32         {d18, d19}, [r6, :128]
+        vbif.s32        d5,  d3,  d31
+        vst1.32         {d22, d23}, [r5, :128], r11
+        vbif.s32        d6,  d0,  d30
+        vld1.32         {d16, d17}, [r5, :128]
+        vbif.s32        d7,  d1,  d31
+        vst1.32         {d4, d5}, [r8, :128], r11
+        vbic.s32        q11, q9,  q14
+        vld1.32         {d2, d3}, [r8, :128]
+        vst1.32         {d6, d7}, [r7, :128], r11
+        vbic.s32        q3,  q1,  q14
+        vld1.32         {d0, d1}, [r7, :128]
+        vhsub.s32       q10, q8,  q11
+        vhadd.s32       q11, q8,  q11
+        vhsub.s32       q2,  q0,  q3
+        vhadd.s32       q3,  q0,  q3
+        vbif.s32        q10, q9,  q15
+        vbif.s32        d22, d16, d30
+        subs            r3,  r3,  #2
+        bgt             2b
+        sub             r11, r10, r11, asr #1
+        vbif.s32        d23, d17, d31
+        vst1.32         {d20, d21}, [r6, :128]
+        vbif.s32        q2,  q1,  q15
+        vst1.32         {d22, d23}, [r5, :128]
+        vbif.s32        q3,  q0,  q15
+        vst1.32         {d4, d5}, [r8, :128]
+        vst1.32         {d6, d7}, [r7, :128]
+.endm
+
+function ff_sbc_calc_scalefactors_j_neon, export=1
+        @ parameters
+        @ r0 = in = sb_sample_f
+        @ r1 = out = scale_factor
+        @ r2 = blocks
+        @ r3 = subbands
+        @ local variables
+        @ r4 = consts = ff_sbcdsp_joint_bits_mask
+        @ r5 = in0
+        @ r6 = in1
+        @ r7 = out0
+        @ r8 = out1
+        @ r10 = zero
+        @ r11 = inc
+        @ return r0 = joint
+
+        push            {r3-r11}
+        movrelx         r4,  X(ff_sbcdsp_joint_bits_mask)
+        mov             r10, #0
+        mov             r11, #64
+
+        vmov.s32        q14, #1
+        vmov.s32        q13, #16    @ 31 - SCALE_OUT_BITS
+
+        cmp             r3, #4
+        bne             8f
+
+4:      @ 4 subbands
+        add             r5,  r0,  #0
+        add             r6,  r0,  #32
+        add             r7,  r1,  #0
+        add             r8,  r1,  #32
+        vmov.s32        q0,  #0x8000    @ 1 << SCALE_OUT_BITS
+        vadd.s32        q0,  q0,  q14
+
+        calc_scalefactors
+
+        @ check whether to use joint stereo for subbands 0, 1, 2
+        vadd.s32        q15, q0,  q1
+        vadd.s32        q9,  q2,  q3
+        vmov.s32        d31[1], r10    @ last subband -> no joint
+        vld1.32         {d16, d17}, [r4, :128]!
+        vcgt.s32        q15, q15, q9
+
+        @ calculate and save to memory 'joint' variable
+        @ update and save scale factors to memory
+        vand.s32        q8,  q8,  q15
+        vbit.s32        q0,  q2,  q15
+        vpadd.s32       d16, d16, d17
+        vbit.s32        q1,  q3,  q15
+        vpadd.s32       d16, d16, d16
+        vst1.32         {d0, d1}, [r7, :128]
+        vst1.32         {d2, d3}, [r8, :128]
+        vmov.32         r0, d16[0]
+
+        update_joint_stereo_samples
+        b               9f
+
+8:      @ 8 subbands
+        add             r5,  r0,  #16
+        add             r6,  r0,  #48
+        add             r7,  r1,  #16
+        add             r8,  r1,  #48
+        vmov.s32        q0,  #0x8000    @ 1 << SCALE_OUT_BITS
+        vadd.s32        q0,  q0,  q14
+
+        calc_scalefactors
+
+        @ check whether to use joint stereo for subbands 4, 5, 6
+        vadd.s32        q15, q0,  q1
+        vadd.s32        q9,  q2,  q3
+        vmov.s32        d31[1], r10    @ last subband -> no joint
+        vld1.32         {d16, d17}, [r4, :128]!
+        vcgt.s32        q15, q15, q9
+
+        @ calculate part of 'joint' variable and save it to d24
+        @ update and save scale factors to memory
+        vand.s32        q8,  q8,  q15
+        vbit.s32        q0,  q2,  q15
+        vpadd.s32       d16, d16, d17
+        vbit.s32        q1,  q3,  q15
+        vst1.32         {d0, d1}, [r7, :128]
+        vst1.32         {d2, d3}, [r8, :128]
+        vpadd.s32       d24, d16, d16
+
+        update_joint_stereo_samples
+
+        add             r5,  r0,  #0
+        add             r6,  r0,  #32
+        add             r7,  r1,  #0
+        add             r8,  r1,  #32
+        vmov.s32        q0,  #0x8000    @ 1 << SCALE_OUT_BITS
+        vadd.s32        q0,  q0,  q14
+
+        calc_scalefactors
+
+        @ check whether to use joint stereo for subbands 0, 1, 2, 3
+        vadd.s32        q15, q0,  q1
+        vadd.s32        q9,  q2,  q3
+        vld1.32         {d16, d17}, [r4, :128]!
+        vcgt.s32        q15, q15, q9
+
+        @ combine last part of 'joint' with d24 and save to memory
+        @ update and save scale factors to memory
+        vand.s32        q8,  q8,  q15
+        vbit.s32        q0,  q2,  q15
+        vpadd.s32       d16, d16, d17
+        vbit.s32        q1,  q3,  q15
+        vpadd.s32       d16, d16, d16
+        vst1.32         {d0, d1}, [r7, :128]
+        vadd.s32        d16, d16, d24
+        vst1.32         {d2, d3}, [r8, :128]
+        vmov.32         r0,  d16[0]
+
+        update_joint_stereo_samples
+9:
+        pop             {r3-r11}
+        bx              lr
+endfunc
+
+function ff_sbc_enc_process_input_4s_neon, export=1
+        @ parameters
+        @ r0 = positioin
+        @ r1 = pcm
+        @ r2 = X
+        @ r3 = nsamples
+        @ r4 = nchannels
+        @ local variables
+        @ r5 = ff_sbc_input_perm_4
+        @ r6 = src / x
+        @ r7 = dst / y
+
+        push            {r1, r3-r7}
+        ldr             r4,  [sp, #24]
+        movrelx         r5,  X(ff_sbc_input_perm_4)
+
+        @ handle X buffer wraparound
+        cmp             r0,  r3
+        bge             1f                     @ if (position < nsamples)
+        add             r7,  r2,  #576         @ &X[0][SBC_X_BUFFER_SIZE - 40]
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0}, [r6, :64]!
+        vst1.16         {d0}, [r7, :64]!
+        cmp             r4,  #1
+        ble             2f                     @ if (nchannels > 1)
+        add             r7,  r2,  #1232        @ &X[1][SBC_X_BUFFER_SIZE - 40]
+        add             r6,  r2,  #656
+        add             r6,  r6,  r0, lsl#1    @ &X[1][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0}, [r6, :64]!
+        vst1.16         {d0}, [r7, :64]!
+2:
+        mov             r0,  #288              @ SBC_X_BUFFER_SIZE - 40
+1:
+
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        add             r7,  r6,  #656         @ &X[1][position]
+
+        cmp             r4,  #1
+        ble             8f                     @ if (nchannels > 1)
+        tst             r1,  #1
+        beq             7f                     @ if (pcm & 1)
+        @ poor 'pcm' alignment
+        vld1.8          {d0, d1}, [r5, :128]
+1:
+        sub             r6,  r6,  #16
+        sub             r7,  r7,  #16
+        sub             r0,  r0,  #8
+        vld1.8          {d4, d5}, [r1]!
+        vuzp.16         d4,  d5
+        vld1.8          {d20, d21}, [r1]!
+        vuzp.16         d20, d21
+        vswp            d5,  d20
+        vtbl.8          d16, {d4, d5}, d0
+        vtbl.8          d17, {d4, d5}, d1
+        vtbl.8          d18, {d20, d21}, d0
+        vtbl.8          d19, {d20, d21}, d1
+        vst1.16         {d16, d17}, [r6, :128]
+        vst1.16         {d18, d19}, [r7, :128]
+        subs            r3,  r3,  #8
+        bgt             1b
+        b               9f
+7:
+        @ proper 'pcm' alignment
+        vld1.8          {d0, d1}, [r5, :128]
+1:
+        sub             r6,  r6,  #16
+        sub             r7,  r7,  #16
+        sub             r0,  r0,  #8
+        vld2.16         {d4, d5}, [r1]!
+        vld2.16         {d20, d21}, [r1]!
+        vswp            d5,  d20
+        vtbl.8          d16, {d4, d5}, d0
+        vtbl.8          d17, {d4, d5}, d1
+        vtbl.8          d18, {d20, d21}, d0
+        vtbl.8          d19, {d20, d21}, d1
+        vst1.16         {d16, d17}, [r6, :128]
+        vst1.16         {d18, d19}, [r7, :128]
+        subs            r3,  r3,  #8
+        bgt             1b
+        b               9f
+8:
+        @ mono
+        vld1.8          {d0, d1}, [r5, :128]
+1:
+        sub             r6,  r6,  #16
+        sub             r0,  r0,  #8
+        vld1.8          {d4, d5}, [r1]!
+        vtbl.8          d16, {d4, d5}, d0
+        vtbl.8          d17, {d4, d5}, d1
+        vst1.16         {d16, d17}, [r6, :128]
+        subs            r3,  r3,  #8
+        bgt             1b
+9:
+        pop             {r1, r3-r7}
+        bx              lr
+endfunc
+
+function ff_sbc_enc_process_input_8s_neon, export=1
+        @ parameters
+        @ r0 = positioin
+        @ r1 = pcm
+        @ r2 = X
+        @ r3 = nsamples
+        @ r4 = nchannels
+        @ local variables
+        @ r5 = ff_sbc_input_perm_8
+        @ r6 = src
+        @ r7 = dst
+
+        push            {r1, r3-r7}
+        ldr             r4,  [sp, #24]
+        movrelx         r5,  X(ff_sbc_input_perm_8)
+
+        @ handle X buffer wraparound
+        cmp             r0,  r3
+        bge             1f                     @ if (position < nsamples)
+        add             r7,  r2,  #512         @ &X[0][SBC_X_BUFFER_SIZE - 72]
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1}, [r6, :128]!
+        vst1.16         {d0, d1}, [r7, :128]!
+        cmp             r4,  #1
+        ble             2f                     @ if (nchannels > 1)
+        add             r7,  r2,  #1168        @ &X[1][SBC_X_BUFFER_SIZE - 72]
+        add             r6,  r2,  #656
+        add             r6,  r6,  r0, lsl#1    @ &X[1][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1}, [r6, :128]!
+        vst1.16         {d0, d1}, [r7, :128]!
+2:
+        mov             r0,  #256              @ SBC_X_BUFFER_SIZE - 72
+1:
+
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        add             r7,  r6,  #656         @ &X[1][position]
+
+        cmp             r4,  #1
+        ble             8f                     @ if (nchannels > 1)
+        tst             r1,  #1
+        beq             7f                     @ if (pcm & 1)
+        @ poor 'pcm' alignment
+        vld1.8          {d0, d1, d2, d3}, [r5, :128]
+1:
+        sub             r6,  r6,  #32
+        sub             r7,  r7,  #32
+        sub             r0,  r0,  #16
+        vld1.8          {d4, d5, d6, d7}, [r1]!
+        vuzp.16         q2,  q3
+        vld1.8          {d20, d21, d22, d23}, [r1]!
+        vuzp.16         q10, q11
+        vswp            q3,  q10
+        vtbl.8          d16, {d4, d5, d6, d7}, d0
+        vtbl.8          d17, {d4, d5, d6, d7}, d1
+        vtbl.8          d18, {d4, d5, d6, d7}, d2
+        vtbl.8          d19, {d4, d5, d6, d7}, d3
+        vst1.16         {d16, d17, d18, d19}, [r6, :128]
+        vtbl.8          d16, {d20, d21, d22, d23}, d0
+        vtbl.8          d17, {d20, d21, d22, d23}, d1
+        vtbl.8          d18, {d20, d21, d22, d23}, d2
+        vtbl.8          d19, {d20, d21, d22, d23}, d3
+        vst1.16         {d16, d17, d18, d19}, [r7, :128]
+        subs            r3,  r3,  #16
+        bgt             1b
+        b 9f
+7:
+        @ proper 'pcm' alignment
+        vld1.8          {d0, d1, d2, d3}, [r5, :128]
+1:
+        sub             r6,  r6,  #32
+        sub             r7,  r7,  #32
+        sub             r0,  r0,  #16
+        vld2.16         {d4, d5, d6, d7}, [r1]!
+        vld2.16         {d20, d21, d22, d23}, [r1]!
+        vswp            q3,  q10
+        vtbl.8          d16, {d4, d5, d6, d7}, d0
+        vtbl.8          d17, {d4, d5, d6, d7}, d1
+        vtbl.8          d18, {d4, d5, d6, d7}, d2
+        vtbl.8          d19, {d4, d5, d6, d7}, d3
+        vst1.16         {d16, d17, d18, d19}, [r6, :128]
+        vtbl.8          d16, {d20, d21, d22, d23}, d0
+        vtbl.8          d17, {d20, d21, d22, d23}, d1
+        vtbl.8          d18, {d20, d21, d22, d23}, d2
+        vtbl.8          d19, {d20, d21, d22, d23}, d3
+        vst1.16         {d16, d17, d18, d19}, [r7, :128]
+        subs            r3,  r3,  #16
+        bgt             1b
+        b               9f
+8:
+        @ mono
+        vld1.8          {d0, d1, d2, d3}, [r5, :128]
+1:
+        sub             r6,  r6,  #32
+        sub             r0,  r0,  #16
+        vld1.8          {d4, d5, d6, d7}, [r1]!
+        vtbl.8          d16, {d4, d5, d6, d7}, d0
+        vtbl.8          d17, {d4, d5, d6, d7}, d1
+        vtbl.8          d18, {d4, d5, d6, d7}, d2
+        vtbl.8          d19, {d4, d5, d6, d7}, d3
+        vst1.16         {d16, d17, d18, d19}, [r6, :128]
+        subs            r3,  r3,  #16
+        bgt             1b
+9:
+        pop             {r1, r3-r7}
+        bx              lr
+endfunc
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index c4134424f0..2d541bf64a 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -632,6 +632,8 @@ enum AVCodecID {
     AV_CODEC_ID_ATRAC3AL,
     AV_CODEC_ID_ATRAC3PAL,
     AV_CODEC_ID_DOLBY_E,
+    AV_CODEC_ID_SBC,
+    AV_CODEC_ID_MSBC,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 92bf1d2681..8d613507e0 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -2859,6 +2859,18 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_SBC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sbc",
+        .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+    },
+    {
+        .id        = AV_CODEC_ID_MSBC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "msbc",
+        .long_name = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono SBC)"),
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/sbc.c b/libavcodec/sbc.c
new file mode 100644
index 0000000000..99d02cc56a
--- /dev/null
+++ b/libavcodec/sbc.c
@@ -0,0 +1,316 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC common functions for the encoder and decoder
+ */
+
+#include "avcodec.h"
+#include "sbc.h"
+
+/*
+ * Calculates the CRC-8 of the first len bits in data
+ */
+static const uint8_t crc_table[256] = {
+    0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53,
+    0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB,
+    0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E,
+    0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76,
+    0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4,
+    0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C,
+    0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19,
+    0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1,
+    0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40,
+    0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8,
+    0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D,
+    0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65,
+    0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7,
+    0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F,
+    0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A,
+    0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2,
+    0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75,
+    0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D,
+    0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8,
+    0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50,
+    0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2,
+    0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A,
+    0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F,
+    0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7,
+    0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66,
+    0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E,
+    0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB,
+    0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43,
+    0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1,
+    0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09,
+    0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C,
+    0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4
+};
+
+uint8_t ff_sbc_crc8(const uint8_t *data, size_t len)
+{
+    uint8_t crc = 0x0f;
+    size_t i;
+    uint8_t octet;
+
+    for (i = 0; i < len / 8; i++)
+        crc = crc_table[crc ^ data[i]];
+
+    octet = data[i];
+    for (i = 0; i < len % 8; i++) {
+        char bit = ((octet ^ crc) & 0x80) >> 7;
+
+        crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0);
+
+        octet = octet << 1;
+    }
+
+    return crc;
+}
+
+/* A2DP specification: Appendix B, page 69 */
+static const int sbc_offset4[4][4] = {
+    { -1, 0, 0, 0 },
+    { -2, 0, 0, 1 },
+    { -2, 0, 0, 1 },
+    { -2, 0, 0, 1 }
+};
+
+/* A2DP specification: Appendix B, page 69 */
+static const int sbc_offset8[4][8] = {
+    { -2, 0, 0, 0, 0, 0, 0, 1 },
+    { -3, 0, 0, 0, 0, 0, 1, 2 },
+    { -4, 0, 0, 0, 0, 0, 1, 2 },
+    { -4, 0, 0, 0, 0, 0, 1, 2 }
+};
+
+/*
+ * Code straight from the spec to calculate the bits array
+ * Takes a pointer to the frame in question, a pointer to the bits array and
+ * the sampling frequency (as 2 bit integer)
+ */
+static av_always_inline void sbc_calculate_bits_internal(
+        const struct sbc_frame *frame, int (*bits)[8], int subbands)
+{
+    uint8_t sf = frame->frequency;
+
+    if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) {
+        int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
+        int ch, sb;
+
+        for (ch = 0; ch < frame->channels; ch++) {
+            max_bitneed = 0;
+            if (frame->allocation == SNR) {
+                for (sb = 0; sb < subbands; sb++) {
+                    bitneed[ch][sb] = frame->scale_factor[ch][sb];
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            } else {
+                for (sb = 0; sb < subbands; sb++) {
+                    if (frame->scale_factor[ch][sb] == 0)
+                        bitneed[ch][sb] = -5;
+                    else {
+                        if (subbands == 4)
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
+                        else
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
+                        if (loudness > 0)
+                            bitneed[ch][sb] = loudness / 2;
+                        else
+                            bitneed[ch][sb] = loudness;
+                    }
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            }
+
+            bitcount = 0;
+            slicecount = 0;
+            bitslice = max_bitneed + 1;
+            do {
+                bitslice--;
+                bitcount += slicecount;
+                slicecount = 0;
+                for (sb = 0; sb < subbands; sb++) {
+                    if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
+                        slicecount++;
+                    else if (bitneed[ch][sb] == bitslice + 1)
+                        slicecount += 2;
+                }
+            } while (bitcount + slicecount < frame->bitpool);
+
+            if (bitcount + slicecount == frame->bitpool) {
+                bitcount += slicecount;
+                bitslice--;
+            }
+
+            for (sb = 0; sb < subbands; sb++) {
+                if (bitneed[ch][sb] < bitslice + 2)
+                    bits[ch][sb] = 0;
+                else {
+                    bits[ch][sb] = bitneed[ch][sb] - bitslice;
+                    if (bits[ch][sb] > 16)
+                        bits[ch][sb] = 16;
+                }
+            }
+
+            for (sb = 0; bitcount < frame->bitpool &&
+                            sb < subbands; sb++) {
+                if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
+                    bits[ch][sb]++;
+                    bitcount++;
+                } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
+                    bits[ch][sb] = 2;
+                    bitcount += 2;
+                }
+            }
+
+            for (sb = 0; bitcount < frame->bitpool &&
+                            sb < subbands; sb++) {
+                if (bits[ch][sb] < 16) {
+                    bits[ch][sb]++;
+                    bitcount++;
+                }
+            }
+
+        }
+
+    } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) {
+        int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
+        int ch, sb;
+
+        max_bitneed = 0;
+        if (frame->allocation == SNR) {
+            for (ch = 0; ch < 2; ch++) {
+                for (sb = 0; sb < subbands; sb++) {
+                    bitneed[ch][sb] = frame->scale_factor[ch][sb];
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            }
+        } else {
+            for (ch = 0; ch < 2; ch++) {
+                for (sb = 0; sb < subbands; sb++) {
+                    if (frame->scale_factor[ch][sb] == 0)
+                        bitneed[ch][sb] = -5;
+                    else {
+                        if (subbands == 4)
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
+                        else
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
+                        if (loudness > 0)
+                            bitneed[ch][sb] = loudness / 2;
+                        else
+                            bitneed[ch][sb] = loudness;
+                    }
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            }
+        }
+
+        bitcount = 0;
+        slicecount = 0;
+        bitslice = max_bitneed + 1;
+        do {
+            bitslice--;
+            bitcount += slicecount;
+            slicecount = 0;
+            for (ch = 0; ch < 2; ch++) {
+                for (sb = 0; sb < subbands; sb++) {
+                    if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
+                        slicecount++;
+                    else if (bitneed[ch][sb] == bitslice + 1)
+                        slicecount += 2;
+                }
+            }
+        } while (bitcount + slicecount < frame->bitpool);
+
+        if (bitcount + slicecount == frame->bitpool) {
+            bitcount += slicecount;
+            bitslice--;
+        }
+
+        for (ch = 0; ch < 2; ch++) {
+            for (sb = 0; sb < subbands; sb++) {
+                if (bitneed[ch][sb] < bitslice + 2) {
+                    bits[ch][sb] = 0;
+                } else {
+                    bits[ch][sb] = bitneed[ch][sb] - bitslice;
+                    if (bits[ch][sb] > 16)
+                        bits[ch][sb] = 16;
+                }
+            }
+        }
+
+        ch = 0;
+        sb = 0;
+        while (bitcount < frame->bitpool) {
+            if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
+                bits[ch][sb]++;
+                bitcount++;
+            } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
+                bits[ch][sb] = 2;
+                bitcount += 2;
+            }
+            if (ch == 1) {
+                ch = 0;
+                sb++;
+                if (sb >= subbands)
+                    break;
+            } else
+                ch = 1;
+        }
+
+        ch = 0;
+        sb = 0;
+        while (bitcount < frame->bitpool) {
+            if (bits[ch][sb] < 16) {
+                bits[ch][sb]++;
+                bitcount++;
+            }
+            if (ch == 1) {
+                ch = 0;
+                sb++;
+                if (sb >= subbands)
+                    break;
+            } else
+                ch = 1;
+        }
+
+    }
+
+}
+
+void ff_sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
+{
+    if (frame->subbands == 4)
+        sbc_calculate_bits_internal(frame, bits, 4);
+    else
+        sbc_calculate_bits_internal(frame, bits, 8);
+}
diff --git a/libavcodec/sbc.h b/libavcodec/sbc.h
new file mode 100644
index 0000000000..169e38f4c1
--- /dev/null
+++ b/libavcodec/sbc.h
@@ -0,0 +1,121 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2014  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC common definitions for the encoder and decoder
+ */
+
+#ifndef AVCODEC_SBC_H
+#define AVCODEC_SBC_H
+
+#include "avcodec.h"
+
+#define MSBC_BLOCKS 15
+
+/* sampling frequency */
+#define SBC_FREQ_16000  0x00
+#define SBC_FREQ_32000  0x01
+#define SBC_FREQ_44100  0x02
+#define SBC_FREQ_48000  0x03
+
+/* blocks */
+#define SBC_BLK_4       0x00
+#define SBC_BLK_8       0x01
+#define SBC_BLK_12      0x02
+#define SBC_BLK_16      0x03
+
+/* channel mode */
+#define SBC_MODE_MONO         0x00
+#define SBC_MODE_DUAL_CHANNEL 0x01
+#define SBC_MODE_STEREO       0x02
+#define SBC_MODE_JOINT_STEREO 0x03
+
+/* allocation method */
+#define SBC_AM_LOUDNESS 0x00
+#define SBC_AM_SNR      0x01
+
+/* subbands */
+#define SBC_SB_4        0x00
+#define SBC_SB_8        0x01
+
+/* synchronisation words */
+#define SBC_SYNCWORD   0x9C
+#define MSBC_SYNCWORD  0xAD
+
+/* extra bits of precision for the synthesis filter input data */
+#define SBCDEC_FIXED_EXTRA_BITS 2
+
+/*
+ * Enforce 16 byte alignment for the data, which is supposed to be used
+ * with SIMD optimized code.
+ */
+#define SBC_ALIGN 16
+
+/* This structure contains an unpacked SBC frame.
+   Yes, there is probably quite some unused space herein */
+struct sbc_frame {
+    uint8_t frequency;
+    uint8_t block_mode;
+    uint8_t blocks;
+    enum {
+        MONO         = SBC_MODE_MONO,
+        DUAL_CHANNEL = SBC_MODE_DUAL_CHANNEL,
+        STEREO       = SBC_MODE_STEREO,
+        JOINT_STEREO = SBC_MODE_JOINT_STEREO
+    } mode;
+    uint8_t channels;
+    enum {
+        LOUDNESS = SBC_AM_LOUDNESS,
+        SNR      = SBC_AM_SNR
+    } allocation;
+    uint8_t subband_mode;
+    uint8_t subbands;
+    uint8_t bitpool;
+    uint16_t codesize;
+    uint16_t length;
+
+    /* bit number x set means joint stereo has been used in subband x */
+    uint8_t joint;
+
+    /* only the lower 4 bits of every element are to be used */
+    DECLARE_ALIGNED(SBC_ALIGN, uint32_t, scale_factor)[2][8];
+
+    /* raw integer subband samples in the frame */
+    DECLARE_ALIGNED(SBC_ALIGN, int32_t, sb_sample_f)[16][2][8];
+
+    /* modified subband samples */
+    DECLARE_ALIGNED(SBC_ALIGN, int32_t, sb_sample)[16][2][8];
+
+    /* original pcm audio samples */
+    DECLARE_ALIGNED(SBC_ALIGN, int16_t, pcm_sample)[2][16*8];
+};
+
+uint8_t ff_sbc_crc8(const uint8_t *data, size_t len);
+void ff_sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]);
+
+#endif /* AVCODEC_SBC_H */
diff --git a/libavcodec/sbcdec.c b/libavcodec/sbcdec.c
new file mode 100644
index 0000000000..f2a40ad117
--- /dev/null
+++ b/libavcodec/sbcdec.c
@@ -0,0 +1,469 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder implementation
+ */
+
+#include <stdbool.h>
+#include "avcodec.h"
+#include "internal.h"
+#include "sbc.h"
+#include "sbcdec_data.h"
+
+struct sbc_decoder_state {
+    int32_t V[2][170];
+    int offset[2][16];
+};
+
+typedef struct SBCDecContext {
+    AVClass *class;
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_frame, frame);
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_decoder_state, dsp);
+    int (*unpack_frame)(const uint8_t *data, struct sbc_frame *frame,
+            size_t len);
+} SBCDecContext;
+
+/*
+ * Unpacks a SBC frame at the beginning of the stream in data,
+ * which has at most len bytes into frame.
+ * Returns the length in bytes of the packed frame, or a negative
+ * value on error. The error codes are:
+ *
+ *  -1   Data stream too short
+ *  -2   Sync byte incorrect
+ *  -3   CRC8 incorrect
+ *  -4   Bitpool value out of bounds
+ */
+static int sbc_unpack_frame_internal(const uint8_t *data,
+                                     struct sbc_frame *frame, size_t len)
+{
+    unsigned int consumed;
+    /* Will copy the parts of the header that are relevant to crc
+     * calculation here */
+    uint8_t crc_header[11] = { 0 };
+    int crc_pos = 0;
+    int32_t temp;
+
+    uint32_t audio_sample;
+    int ch, sb, blk, bit;   /* channel, subband, block and bit standard
+                               counters */
+    int bits[2][8];         /* bits distribution */
+    uint32_t levels[2][8];  /* levels derived from that */
+
+    consumed = 32;
+
+    crc_header[0] = data[1];
+    crc_header[1] = data[2];
+    crc_pos = 16;
+
+    if (frame->mode == JOINT_STEREO) {
+        if (len * 8 < consumed + frame->subbands)
+            return -1;
+
+        frame->joint = 0x00;
+        for (sb = 0; sb < frame->subbands - 1; sb++)
+            frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb;
+        if (frame->subbands == 4)
+            crc_header[crc_pos / 8] = data[4] & 0xf0;
+        else
+            crc_header[crc_pos / 8] = data[4];
+
+        consumed += frame->subbands;
+        crc_pos += frame->subbands;
+    }
+
+    if (len * 8 < consumed + (4 * frame->subbands * frame->channels))
+        return -1;
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++) {
+            /* FIXME assert(consumed % 4 == 0); */
+            frame->scale_factor[ch][sb] =
+                (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F;
+            crc_header[crc_pos >> 3] |=
+                frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7));
+
+            consumed += 4;
+            crc_pos += 4;
+        }
+    }
+
+    if (data[3] != ff_sbc_crc8(crc_header, crc_pos))
+        return -3;
+
+    ff_sbc_calculate_bits(frame, bits);
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++)
+            levels[ch][sb] = (1 << bits[ch][sb]) - 1;
+    }
+
+    for (blk = 0; blk < frame->blocks; blk++) {
+        for (ch = 0; ch < frame->channels; ch++) {
+            for (sb = 0; sb < frame->subbands; sb++) {
+                uint32_t shift;
+
+                if (levels[ch][sb] == 0) {
+                    frame->sb_sample[blk][ch][sb] = 0;
+                    continue;
+                }
+
+                shift = frame->scale_factor[ch][sb] +
+                        1 + SBCDEC_FIXED_EXTRA_BITS;
+
+                audio_sample = 0;
+                for (bit = 0; bit < bits[ch][sb]; bit++) {
+                    if (consumed > len * 8)
+                        return -1;
+
+                    if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01)
+                        audio_sample |= 1 << (bits[ch][sb] - bit - 1);
+
+                    consumed++;
+                }
+
+                frame->sb_sample[blk][ch][sb] = (int32_t)
+                    (((((uint64_t) audio_sample << 1) | 1) << shift) /
+                    levels[ch][sb]) - (1 << shift);
+            }
+        }
+    }
+
+    if (frame->mode == JOINT_STEREO) {
+        for (blk = 0; blk < frame->blocks; blk++) {
+            for (sb = 0; sb < frame->subbands; sb++) {
+                if (frame->joint & (0x01 << sb)) {
+                    temp = frame->sb_sample[blk][0][sb] +
+                           frame->sb_sample[blk][1][sb];
+                    frame->sb_sample[blk][1][sb] =
+                        frame->sb_sample[blk][0][sb] -
+                        frame->sb_sample[blk][1][sb];
+                    frame->sb_sample[blk][0][sb] = temp;
+                }
+            }
+        }
+    }
+
+    if ((consumed & 0x7) != 0)
+        consumed += 8 - (consumed & 0x7);
+
+    return consumed >> 3;
+}
+
+static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame,
+                            size_t len)
+{
+    if (len < 4)
+        return -1;
+
+    if (data[0] != SBC_SYNCWORD)
+        return -2;
+
+    frame->frequency  = (data[1] >> 6) & 0x03;
+    frame->block_mode = (data[1] >> 4) & 0x03;
+
+    switch (frame->block_mode) {
+    case SBC_BLK_4:
+        frame->blocks = 4;
+        break;
+    case SBC_BLK_8:
+        frame->blocks = 8;
+        break;
+    case SBC_BLK_12:
+        frame->blocks = 12;
+        break;
+    case SBC_BLK_16:
+        frame->blocks = 16;
+        break;
+    }
+
+    frame->mode = (data[1] >> 2) & 0x03;
+
+    switch (frame->mode) {
+    case MONO:
+        frame->channels = 1;
+        break;
+    case DUAL_CHANNEL:    /* fall-through */
+    case STEREO:
+    case JOINT_STEREO:
+        frame->channels = 2;
+        break;
+    }
+
+    frame->allocation = (data[1] >> 1) & 0x01;
+
+    frame->subband_mode = (data[1] & 0x01);
+    frame->subbands = frame->subband_mode ? 8 : 4;
+
+    frame->bitpool = data[2];
+
+    if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) &&
+            frame->bitpool > 16 * frame->subbands)
+        return -4;
+
+    if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) &&
+            frame->bitpool > 32 * frame->subbands)
+        return -4;
+
+    return sbc_unpack_frame_internal(data, frame, len);
+}
+
+static int msbc_unpack_frame(const uint8_t *data,
+                             struct sbc_frame *frame, size_t len)
+{
+    if (len < 4)
+        return -1;
+
+    if (data[0] != MSBC_SYNCWORD)
+        return -2;
+    if (data[1] != 0)
+        return -2;
+    if (data[2] != 0)
+        return -2;
+
+    frame->frequency = SBC_FREQ_16000;
+    frame->block_mode = SBC_BLK_4;
+    frame->blocks = MSBC_BLOCKS;
+    frame->allocation = LOUDNESS;
+    frame->mode = MONO;
+    frame->channels = 1;
+    frame->subband_mode = 1;
+    frame->subbands = 8;
+    frame->bitpool = 26;
+
+    return sbc_unpack_frame_internal(data, frame, len);
+}
+
+static void sbc_decoder_init(struct sbc_decoder_state *state)
+{
+    int i, ch;
+
+    memset(state->V, 0, sizeof(state->V));
+
+    for (ch = 0; ch < 2; ch++)
+        for (i = 0; i < FF_ARRAY_ELEMS(state->offset[0]); i++)
+            state->offset[ch][i] = (10 * i + 10);
+}
+
+static inline void sbc_synthesize_four(struct sbc_decoder_state *state,
+                                       struct sbc_frame *frame, int ch, int blk)
+{
+    int i, k, idx;
+    int32_t *v = state->V[ch];
+    int *offset = state->offset[ch];
+
+    for (i = 0; i < 8; i++) {
+        /* Shifting */
+        offset[i]--;
+        if (offset[i] < 0) {
+            offset[i] = 79;
+            memcpy(v + 80, v, 9 * sizeof(*v));
+        }
+
+        /* Distribute the new matrix value to the shifted position */
+        v[offset[i]] =
+            ( ff_synmatrix4[i][0] * frame->sb_sample[blk][ch][0] +
+              ff_synmatrix4[i][1] * frame->sb_sample[blk][ch][1] +
+              ff_synmatrix4[i][2] * frame->sb_sample[blk][ch][2] +
+              ff_synmatrix4[i][3] * frame->sb_sample[blk][ch][3] ) >> 15;
+    }
+
+    /* Compute the samples */
+    for (idx = 0, i = 0; i < 4; i++, idx += 5) {
+        k = (i + 4) & 0xf;
+
+        /* Store in output, Q0 */
+        frame->pcm_sample[ch][blk * 4 + i] = av_clip_int16(
+            ( v[offset[i] + 0] * ff_sbc_proto_4_40m0[idx + 0] +
+              v[offset[k] + 1] * ff_sbc_proto_4_40m1[idx + 0] +
+              v[offset[i] + 2] * ff_sbc_proto_4_40m0[idx + 1] +
+              v[offset[k] + 3] * ff_sbc_proto_4_40m1[idx + 1] +
+              v[offset[i] + 4] * ff_sbc_proto_4_40m0[idx + 2] +
+              v[offset[k] + 5] * ff_sbc_proto_4_40m1[idx + 2] +
+              v[offset[i] + 6] * ff_sbc_proto_4_40m0[idx + 3] +
+              v[offset[k] + 7] * ff_sbc_proto_4_40m1[idx + 3] +
+              v[offset[i] + 8] * ff_sbc_proto_4_40m0[idx + 4] +
+              v[offset[k] + 9] * ff_sbc_proto_4_40m1[idx + 4] ) >> 15);
+    }
+}
+
+static inline void sbc_synthesize_eight(struct sbc_decoder_state *state,
+                                        struct sbc_frame *frame,
+                                        int ch, int blk)
+{
+    int i, k, idx;
+    int32_t *v = state->V[ch];
+    int *offset = state->offset[ch];
+
+    for (i = 0; i < 16; i++) {
+        /* Shifting */
+        offset[i]--;
+        if (offset[i] < 0) {
+            offset[i] = 159;
+            memcpy(v + 160, v, 9 * sizeof(*v));
+        }
+
+        /* Distribute the new matrix value to the shifted position */
+        v[offset[i]] =
+            ( ff_synmatrix8[i][0] * frame->sb_sample[blk][ch][0] +
+              ff_synmatrix8[i][1] * frame->sb_sample[blk][ch][1] +
+              ff_synmatrix8[i][2] * frame->sb_sample[blk][ch][2] +
+              ff_synmatrix8[i][3] * frame->sb_sample[blk][ch][3] +
+              ff_synmatrix8[i][4] * frame->sb_sample[blk][ch][4] +
+              ff_synmatrix8[i][5] * frame->sb_sample[blk][ch][5] +
+              ff_synmatrix8[i][6] * frame->sb_sample[blk][ch][6] +
+              ff_synmatrix8[i][7] * frame->sb_sample[blk][ch][7] ) >> 15;
+    }
+
+    /* Compute the samples */
+    for (idx = 0, i = 0; i < 8; i++, idx += 5) {
+        k = (i + 8) & 0xf;
+
+        /* Store in output, Q0 */
+        frame->pcm_sample[ch][blk * 8 + i] = av_clip_int16(
+            ( v[offset[i] + 0] * ff_sbc_proto_8_80m0[idx + 0] +
+              v[offset[k] + 1] * ff_sbc_proto_8_80m1[idx + 0] +
+              v[offset[i] + 2] * ff_sbc_proto_8_80m0[idx + 1] +
+              v[offset[k] + 3] * ff_sbc_proto_8_80m1[idx + 1] +
+              v[offset[i] + 4] * ff_sbc_proto_8_80m0[idx + 2] +
+              v[offset[k] + 5] * ff_sbc_proto_8_80m1[idx + 2] +
+              v[offset[i] + 6] * ff_sbc_proto_8_80m0[idx + 3] +
+              v[offset[k] + 7] * ff_sbc_proto_8_80m1[idx + 3] +
+              v[offset[i] + 8] * ff_sbc_proto_8_80m0[idx + 4] +
+              v[offset[k] + 9] * ff_sbc_proto_8_80m1[idx + 4] ) >> 15);
+    }
+}
+
+static int sbc_synthesize_audio(struct sbc_decoder_state *state,
+                                struct sbc_frame *frame)
+{
+    int ch, blk;
+
+    switch (frame->subbands) {
+    case 4:
+        for (ch = 0; ch < frame->channels; ch++) {
+            for (blk = 0; blk < frame->blocks; blk++)
+                sbc_synthesize_four(state, frame, ch, blk);
+        }
+        return frame->blocks * 4;
+
+    case 8:
+        for (ch = 0; ch < frame->channels; ch++) {
+            for (blk = 0; blk < frame->blocks; blk++)
+                sbc_synthesize_eight(state, frame, ch, blk);
+        }
+        return frame->blocks * 8;
+
+    default:
+        return AVERROR(EIO);
+    }
+}
+
+static int sbc_decode_init(AVCodecContext *avctx)
+{
+    SBCDecContext *sbc = avctx->priv_data;
+    sbc->unpack_frame = sbc_unpack_frame;
+    sbc_decoder_init(&sbc->dsp);
+    return 0;
+}
+
+static int msbc_decode_init(AVCodecContext *avctx)
+{
+    SBCDecContext *sbc = avctx->priv_data;
+    sbc->unpack_frame = msbc_unpack_frame;
+    sbc_decoder_init(&sbc->dsp);
+    return 0;
+}
+
+static int sbc_decode_frame(AVCodecContext *avctx,
+                            void *data, int *got_frame_ptr,
+                            AVPacket *avpkt)
+{
+    SBCDecContext *sbc = avctx->priv_data;
+    int i, ch, samples, ret;
+    AVFrame *frame = data;
+    int16_t *ptr;
+
+    if (!sbc)
+        return AVERROR(EIO);
+
+    sbc->frame.length = sbc->unpack_frame(avpkt->data, &sbc->frame, avpkt->size);
+    if (sbc->frame.length <= 0)
+        return sbc->frame.length;
+
+    samples = sbc_synthesize_audio(&sbc->dsp, &sbc->frame);
+
+    frame->nb_samples = samples;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+    ptr = (int16_t *)frame->data[0];
+
+    for (i = 0; i < samples; i++)
+        for (ch = 0; ch < sbc->frame.channels; ch++)
+            *ptr++ = sbc->frame.pcm_sample[ch][i];
+
+    *got_frame_ptr = 1;
+
+    return sbc->frame.length;
+}
+
+#if CONFIG_SBC_DECODER
+AVCodec ff_sbc_decoder = {
+    .name                  = "sbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_SBC,
+    .priv_data_size        = sizeof(SBCDecContext),
+    .init                  = sbc_decode_init,
+    .decode                = sbc_decode_frame,
+    .capabilities          = AV_CODEC_CAP_DR1,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                                  AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000, 0 },
+};
+#endif
+
+#if CONFIG_MSBC_DECODER
+AVCodec ff_msbc_decoder = {
+    .name                  = "msbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono SBC)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_MSBC,
+    .priv_data_size        = sizeof(SBCDecContext),
+    .init                  = msbc_decode_init,
+    .decode                = sbc_decode_frame,
+    .capabilities          = AV_CODEC_CAP_DR1,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 0 },
+};
+#endif
diff --git a/libavcodec/sbcdec_data.c b/libavcodec/sbcdec_data.c
new file mode 100644
index 0000000000..2152162207
--- /dev/null
+++ b/libavcodec/sbcdec_data.c
@@ -0,0 +1,127 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder tables
+ */
+
+#include <stdint.h>
+#include "sbcdec_data.h"
+#include "sbc.h"
+
+#define SS4(val)  ((int32_t)val >> 12)
+#define SS8(val)  ((int32_t)val >> 14)
+#define SN4(val)  ((int32_t)val >> 11 + 1 + SBCDEC_FIXED_EXTRA_BITS)
+#define SN8(val)  ((int32_t)val >> 11 + 1 + SBCDEC_FIXED_EXTRA_BITS)
+
+const int32_t ff_sbc_proto_4_40m0[] = {
+    SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8),
+    SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8),
+    SS4(0x027c1434), SS4(0x0019118b), SS4(0xfff3c74c), SS4(0xff137330),
+    SS4(0xf81b8d70), SS4(0x00ec1b8b), SS4(0xfff0b71a), SS4(0xffe99b00),
+    SS4(0xfef84470), SS4(0xf6fb4370), SS4(0xffcdc351), SS4(0xffe01dc7)
+};
+
+const int32_t ff_sbc_proto_4_40m1[] = {
+    SS4(0xffe090ce), SS4(0xff2c0475), SS4(0xf694f800), SS4(0xff2c0475),
+    SS4(0xffe090ce), SS4(0xffe01dc7), SS4(0xffcdc351), SS4(0xf6fb4370),
+    SS4(0xfef84470), SS4(0xffe99b00), SS4(0xfff0b71a), SS4(0x00ec1b8b),
+    SS4(0xf81b8d70), SS4(0xff137330), SS4(0xfff3c74c), SS4(0x0019118b),
+    SS4(0x027c1434), SS4(0xf9c2a8d8), SS4(0xff589157), SS4(0xfffb9ac7)
+};
+
+const int32_t ff_sbc_proto_8_80m0[] = {
+    SS8(0x00000000), SS8(0xfe8d1970), SS8(0xee979f00), SS8(0x11686100),
+    SS8(0x0172e690), SS8(0xfff5bd1a), SS8(0xfdf1c8d4), SS8(0xeac182c0),
+    SS8(0x0d9daee0), SS8(0x00e530da), SS8(0xffe9811d), SS8(0xfd52986c),
+    SS8(0xe7054ca0), SS8(0x0a00d410), SS8(0x006c1de4), SS8(0xffdba705),
+    SS8(0xfcbc98e8), SS8(0xe3889d20), SS8(0x06af2308), SS8(0x000bb7db),
+    SS8(0xffca00ed), SS8(0xfc3fbb68), SS8(0xe071bc00), SS8(0x03bf7948),
+    SS8(0xffc4e05c), SS8(0xffb54b3b), SS8(0xfbedadc0), SS8(0xdde26200),
+    SS8(0x0142291c), SS8(0xff960e94), SS8(0xff9f3e17), SS8(0xfbd8f358),
+    SS8(0xdbf79400), SS8(0xff405e01), SS8(0xff7d4914), SS8(0xff8b1a31),
+    SS8(0xfc1417b8), SS8(0xdac7bb40), SS8(0xfdbb828c), SS8(0xff762170)
+};
+
+const int32_t ff_sbc_proto_8_80m1[] = {
+    SS8(0xff7c272c), SS8(0xfcb02620), SS8(0xda612700), SS8(0xfcb02620),
+    SS8(0xff7c272c), SS8(0xff762170), SS8(0xfdbb828c), SS8(0xdac7bb40),
+    SS8(0xfc1417b8), SS8(0xff8b1a31), SS8(0xff7d4914), SS8(0xff405e01),
+    SS8(0xdbf79400), SS8(0xfbd8f358), SS8(0xff9f3e17), SS8(0xff960e94),
+    SS8(0x0142291c), SS8(0xdde26200), SS8(0xfbedadc0), SS8(0xffb54b3b),
+    SS8(0xffc4e05c), SS8(0x03bf7948), SS8(0xe071bc00), SS8(0xfc3fbb68),
+    SS8(0xffca00ed), SS8(0x000bb7db), SS8(0x06af2308), SS8(0xe3889d20),
+    SS8(0xfcbc98e8), SS8(0xffdba705), SS8(0x006c1de4), SS8(0x0a00d410),
+    SS8(0xe7054ca0), SS8(0xfd52986c), SS8(0xffe9811d), SS8(0x00e530da),
+    SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a)
+};
+
+const int32_t ff_synmatrix4[8][4] = {
+    { SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) },
+    { SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) },
+    { SN4(0x00000000), SN4(0x00000000), SN4(0x00000000), SN4(0x00000000) },
+    { SN4(0xfcf043ac), SN4(0x07641af0), SN4(0xf89be510), SN4(0x030fbc54) },
+    { SN4(0xfa57d868), SN4(0x05a82798), SN4(0x05a82798), SN4(0xfa57d868) },
+    { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) },
+    { SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000) },
+    { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) }
+};
+
+const int32_t ff_synmatrix8[16][8] = {
+    { SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798),
+      SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798) },
+    { SN8(0x0471ced0), SN8(0xf8275a10), SN8(0x018f8b84), SN8(0x06a6d988),
+      SN8(0xf9592678), SN8(0xfe70747c), SN8(0x07d8a5f0), SN8(0xfb8e3130) },
+    { SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac),
+      SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54) },
+    { SN8(0x018f8b84), SN8(0xfb8e3130), SN8(0x06a6d988), SN8(0xf8275a10),
+      SN8(0x07d8a5f0), SN8(0xf9592678), SN8(0x0471ced0), SN8(0xfe70747c) },
+    { SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000),
+      SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000) },
+    { SN8(0xfe70747c), SN8(0x0471ced0), SN8(0xf9592678), SN8(0x07d8a5f0),
+      SN8(0xf8275a10), SN8(0x06a6d988), SN8(0xfb8e3130), SN8(0x018f8b84) },
+    { SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54),
+      SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac) },
+    { SN8(0xfb8e3130), SN8(0x07d8a5f0), SN8(0xfe70747c), SN8(0xf9592678),
+      SN8(0x06a6d988), SN8(0x018f8b84), SN8(0xf8275a10), SN8(0x0471ced0) },
+    { SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868),
+      SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868) },
+    { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0),
+      SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) },
+    { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0),
+      SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) },
+    { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c),
+      SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) },
+    { SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000),
+      SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000) },
+    { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c),
+      SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) },
+    { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0),
+      SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) },
+    { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0),
+      SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }
+};
diff --git a/libavcodec/sbcdec_data.h b/libavcodec/sbcdec_data.h
new file mode 100644
index 0000000000..1b79d1de23
--- /dev/null
+++ b/libavcodec/sbcdec_data.h
@@ -0,0 +1,44 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder tables
+ */
+
+#ifndef AVCODEC_SBCDEC_DATA_H
+#define AVCODEC_SBCDEC_DATA_H
+
+#include <stdint.h>
+
+extern const int32_t ff_sbc_proto_4_40m0[];
+extern const int32_t ff_sbc_proto_4_40m1[];
+extern const int32_t ff_sbc_proto_8_80m0[];
+extern const int32_t ff_sbc_proto_8_80m1[];
+extern const int32_t ff_synmatrix4[8][4];
+extern const int32_t ff_synmatrix8[16][8];
+
+#endif /* AVCODEC_SBCDEC_DATA_H */
diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
new file mode 100644
index 0000000000..0cdf5ef5aa
--- /dev/null
+++ b/libavcodec/sbcdsp.c
@@ -0,0 +1,569 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/intmath.h"
+#include "libavutil/intreadwrite.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+#include "sbcdsp_data.h"
+
+/*
+ * A reference C code of analysis filter with SIMD-friendly tables
+ * reordering and code layout. This code can be used to develop platform
+ * specific SIMD optimizations. Also it may be used as some kind of test
+ * for compiler autovectorization capabilities (who knows, if the compiler
+ * is very good at this stuff, hand optimized assembly may be not strictly
+ * needed for some platform).
+ *
+ * Note: It is also possible to make a simple variant of analysis filter,
+ * which needs only a single constants table without taking care about
+ * even/odd cases. This simple variant of filter can be implemented without
+ * input data permutation. The only thing that would be lost is the
+ * possibility to use pairwise SIMD multiplications. But for some simple
+ * CPU cores without SIMD extensions it can be useful. If anybody is
+ * interested in implementing such variant of a filter, sourcecode from
+ * bluez versions 4.26/4.27 can be used as a reference and the history of
+ * the changes in git repository done around that time may be worth checking.
+ */
+
+static void sbc_analyze_4_simd(const int16_t *in, int32_t *out,
+                               const int16_t *consts)
+{
+    int32_t t1[4];
+    int16_t t2[4];
+    int hop = 0;
+
+    /* rounding coefficient */
+    t1[0] = t1[1] = t1[2] = t1[3] =
+        (int32_t) 1 << (SBC_PROTO_FIXED4_SCALE - 1);
+
+    /* low pass polyphase filter */
+    for (hop = 0; hop < 40; hop += 8) {
+        t1[0] += (int32_t) in[hop] * consts[hop];
+        t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
+        t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
+        t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
+        t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
+        t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
+        t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
+        t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
+    }
+
+    /* scaling */
+    t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
+    t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
+    t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
+    t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
+
+    /* do the cos transform */
+    t1[0]  = (int32_t) t2[0] * consts[40 + 0];
+    t1[0] += (int32_t) t2[1] * consts[40 + 1];
+    t1[1]  = (int32_t) t2[0] * consts[40 + 2];
+    t1[1] += (int32_t) t2[1] * consts[40 + 3];
+    t1[2]  = (int32_t) t2[0] * consts[40 + 4];
+    t1[2] += (int32_t) t2[1] * consts[40 + 5];
+    t1[3]  = (int32_t) t2[0] * consts[40 + 6];
+    t1[3] += (int32_t) t2[1] * consts[40 + 7];
+
+    t1[0] += (int32_t) t2[2] * consts[40 + 8];
+    t1[0] += (int32_t) t2[3] * consts[40 + 9];
+    t1[1] += (int32_t) t2[2] * consts[40 + 10];
+    t1[1] += (int32_t) t2[3] * consts[40 + 11];
+    t1[2] += (int32_t) t2[2] * consts[40 + 12];
+    t1[2] += (int32_t) t2[3] * consts[40 + 13];
+    t1[3] += (int32_t) t2[2] * consts[40 + 14];
+    t1[3] += (int32_t) t2[3] * consts[40 + 15];
+
+    out[0] = t1[0] >>
+        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+    out[1] = t1[1] >>
+        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+    out[2] = t1[2] >>
+        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+    out[3] = t1[3] >>
+        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+}
+
+static void sbc_analyze_8_simd(const int16_t *in, int32_t *out,
+                               const int16_t *consts)
+{
+    int32_t t1[8];
+    int16_t t2[8];
+    int i, hop;
+
+    /* rounding coefficient */
+    t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
+        (int32_t) 1 << (SBC_PROTO_FIXED8_SCALE-1);
+
+    /* low pass polyphase filter */
+    for (hop = 0; hop < 80; hop += 16) {
+        t1[0] += (int32_t) in[hop] * consts[hop];
+        t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
+        t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
+        t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
+        t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
+        t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
+        t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
+        t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
+        t1[4] += (int32_t) in[hop + 8] * consts[hop + 8];
+        t1[4] += (int32_t) in[hop + 9] * consts[hop + 9];
+        t1[5] += (int32_t) in[hop + 10] * consts[hop + 10];
+        t1[5] += (int32_t) in[hop + 11] * consts[hop + 11];
+        t1[6] += (int32_t) in[hop + 12] * consts[hop + 12];
+        t1[6] += (int32_t) in[hop + 13] * consts[hop + 13];
+        t1[7] += (int32_t) in[hop + 14] * consts[hop + 14];
+        t1[7] += (int32_t) in[hop + 15] * consts[hop + 15];
+    }
+
+    /* scaling */
+    t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
+    t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
+    t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
+    t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
+    t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
+    t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
+    t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
+    t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
+
+
+    /* do the cos transform */
+    t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0;
+
+    for (i = 0; i < 4; i++) {
+        t1[0] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 0];
+        t1[0] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 1];
+        t1[1] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 2];
+        t1[1] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 3];
+        t1[2] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 4];
+        t1[2] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 5];
+        t1[3] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 6];
+        t1[3] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 7];
+        t1[4] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 8];
+        t1[4] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 9];
+        t1[5] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 10];
+        t1[5] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 11];
+        t1[6] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 12];
+        t1[6] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 13];
+        t1[7] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 14];
+        t1[7] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 15];
+    }
+
+    for (i = 0; i < 8; i++)
+        out[i] = t1[i] >>
+            (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
+}
+
+static inline void sbc_analyze_4b_4s_simd(SBCDSPContext *s,
+                                          int16_t *x, int32_t *out, int out_stride)
+{
+    /* Analyze blocks */
+    s->sbc_analyze_4(x + 12, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_4(x + 8, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+    out += out_stride;
+    s->sbc_analyze_4(x + 4, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_4(x + 0, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+
+    emms_c();
+}
+
+static inline void sbc_analyze_4b_8s_simd(SBCDSPContext *s,
+                                          int16_t *x, int32_t *out, int out_stride)
+{
+    /* Analyze blocks */
+    s->sbc_analyze_8(x + 24, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_8(x + 16, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+    out += out_stride;
+    s->sbc_analyze_8(x + 8, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_8(x + 0, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+
+    emms_c();
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+                                               int16_t *x, int32_t *out,
+                                               int out_stride);
+
+static inline void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s,
+                                              int16_t *x, int32_t *out,
+                                              int out_stride)
+{
+    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_even;
+
+    emms_c();
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+                                               int16_t *x, int32_t *out,
+                                               int out_stride)
+{
+    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+
+    emms_c();
+}
+
+#define PCM(i)  AV_RN16(pcm + 2*(i))
+
+/*
+ * Internal helper functions for input data processing. In order to get
+ * optimal performance, it is important to have "nsamples" and "nchannels"
+ * arguments used with this inline function as compile time constants.
+ */
+
+static av_always_inline int sbc_encoder_process_input_s4_internal(
+    int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+    int nsamples, int nchannels)
+{
+    /* handle X buffer wraparound */
+    if (position < nsamples) {
+        if (nchannels > 0)
+            memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position],
+                            36 * sizeof(int16_t));
+        if (nchannels > 1)
+            memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position],
+                            36 * sizeof(int16_t));
+        position = SBC_X_BUFFER_SIZE - 40;
+    }
+
+    /* copy/permutate audio samples */
+    while ((nsamples -= 8) >= 0) {
+        position -= 8;
+        if (nchannels > 0) {
+            int16_t *x = &X[0][position];
+            x[0]  = PCM(0 + 7 * nchannels);
+            x[1]  = PCM(0 + 3 * nchannels);
+            x[2]  = PCM(0 + 6 * nchannels);
+            x[3]  = PCM(0 + 4 * nchannels);
+            x[4]  = PCM(0 + 0 * nchannels);
+            x[5]  = PCM(0 + 2 * nchannels);
+            x[6]  = PCM(0 + 1 * nchannels);
+            x[7]  = PCM(0 + 5 * nchannels);
+        }
+        if (nchannels > 1) {
+            int16_t *x = &X[1][position];
+            x[0]  = PCM(1 + 7 * nchannels);
+            x[1]  = PCM(1 + 3 * nchannels);
+            x[2]  = PCM(1 + 6 * nchannels);
+            x[3]  = PCM(1 + 4 * nchannels);
+            x[4]  = PCM(1 + 0 * nchannels);
+            x[5]  = PCM(1 + 2 * nchannels);
+            x[6]  = PCM(1 + 1 * nchannels);
+            x[7]  = PCM(1 + 5 * nchannels);
+        }
+        pcm += 16 * nchannels;
+    }
+
+    return position;
+}
+
+static av_always_inline int sbc_encoder_process_input_s8_internal(
+    int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+    int nsamples, int nchannels)
+{
+    /* handle X buffer wraparound */
+    if (position < nsamples) {
+        if (nchannels > 0)
+            memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position],
+                            72 * sizeof(int16_t));
+        if (nchannels > 1)
+            memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position],
+                            72 * sizeof(int16_t));
+        position = SBC_X_BUFFER_SIZE - 72;
+    }
+
+    if (position % 16 == 8) {
+        position -= 8;
+        nsamples -= 8;
+        if (nchannels > 0) {
+            int16_t *x = &X[0][position];
+            x[0]  = PCM(0 + (15-8) * nchannels);
+            x[2]  = PCM(0 + (14-8) * nchannels);
+            x[3]  = PCM(0 + (8-8) * nchannels);
+            x[4]  = PCM(0 + (13-8) * nchannels);
+            x[5]  = PCM(0 + (9-8) * nchannels);
+            x[6]  = PCM(0 + (12-8) * nchannels);
+            x[7]  = PCM(0 + (10-8) * nchannels);
+            x[8]  = PCM(0 + (11-8) * nchannels);
+        }
+        if (nchannels > 1) {
+            int16_t *x = &X[1][position];
+            x[0]  = PCM(1 + (15-8) * nchannels);
+            x[2]  = PCM(1 + (14-8) * nchannels);
+            x[3]  = PCM(1 + (8-8) * nchannels);
+            x[4]  = PCM(1 + (13-8) * nchannels);
+            x[5]  = PCM(1 + (9-8) * nchannels);
+            x[6]  = PCM(1 + (12-8) * nchannels);
+            x[7]  = PCM(1 + (10-8) * nchannels);
+            x[8]  = PCM(1 + (11-8) * nchannels);
+        }
+
+        pcm += 16 * nchannels;
+    }
+
+    /* copy/permutate audio samples */
+    while (nsamples >= 16) {
+        position -= 16;
+        if (nchannels > 0) {
+            int16_t *x = &X[0][position];
+            x[0]  = PCM(0 + 15 * nchannels);
+            x[1]  = PCM(0 + 7 * nchannels);
+            x[2]  = PCM(0 + 14 * nchannels);
+            x[3]  = PCM(0 + 8 * nchannels);
+            x[4]  = PCM(0 + 13 * nchannels);
+            x[5]  = PCM(0 + 9 * nchannels);
+            x[6]  = PCM(0 + 12 * nchannels);
+            x[7]  = PCM(0 + 10 * nchannels);
+            x[8]  = PCM(0 + 11 * nchannels);
+            x[9]  = PCM(0 + 3 * nchannels);
+            x[10] = PCM(0 + 6 * nchannels);
+            x[11] = PCM(0 + 0 * nchannels);
+            x[12] = PCM(0 + 5 * nchannels);
+            x[13] = PCM(0 + 1 * nchannels);
+            x[14] = PCM(0 + 4 * nchannels);
+            x[15] = PCM(0 + 2 * nchannels);
+        }
+        if (nchannels > 1) {
+            int16_t *x = &X[1][position];
+            x[0]  = PCM(1 + 15 * nchannels);
+            x[1]  = PCM(1 + 7 * nchannels);
+            x[2]  = PCM(1 + 14 * nchannels);
+            x[3]  = PCM(1 + 8 * nchannels);
+            x[4]  = PCM(1 + 13 * nchannels);
+            x[5]  = PCM(1 + 9 * nchannels);
+            x[6]  = PCM(1 + 12 * nchannels);
+            x[7]  = PCM(1 + 10 * nchannels);
+            x[8]  = PCM(1 + 11 * nchannels);
+            x[9]  = PCM(1 + 3 * nchannels);
+            x[10] = PCM(1 + 6 * nchannels);
+            x[11] = PCM(1 + 0 * nchannels);
+            x[12] = PCM(1 + 5 * nchannels);
+            x[13] = PCM(1 + 1 * nchannels);
+            x[14] = PCM(1 + 4 * nchannels);
+            x[15] = PCM(1 + 2 * nchannels);
+        }
+        pcm += 32 * nchannels;
+        nsamples -= 16;
+    }
+
+    if (nsamples == 8) {
+        position -= 8;
+        if (nchannels > 0) {
+            int16_t *x = &X[0][position];
+            x[-7] = PCM(0 + 7 * nchannels);
+            x[1]  = PCM(0 + 3 * nchannels);
+            x[2]  = PCM(0 + 6 * nchannels);
+            x[3]  = PCM(0 + 0 * nchannels);
+            x[4]  = PCM(0 + 5 * nchannels);
+            x[5]  = PCM(0 + 1 * nchannels);
+            x[6]  = PCM(0 + 4 * nchannels);
+            x[7]  = PCM(0 + 2 * nchannels);
+        }
+        if (nchannels > 1) {
+            int16_t *x = &X[1][position];
+            x[-7] = PCM(1 + 7 * nchannels);
+            x[1]  = PCM(1 + 3 * nchannels);
+            x[2]  = PCM(1 + 6 * nchannels);
+            x[3]  = PCM(1 + 0 * nchannels);
+            x[4]  = PCM(1 + 5 * nchannels);
+            x[5]  = PCM(1 + 1 * nchannels);
+            x[6]  = PCM(1 + 4 * nchannels);
+            x[7]  = PCM(1 + 2 * nchannels);
+        }
+    }
+
+    return position;
+}
+
+/*
+ * Input data processing functions. The data is endian converted if needed,
+ * channels are deintrleaved and audio samples are reordered for use in
+ * SIMD-friendly analysis filter function. The results are put into "X"
+ * array, getting appended to the previous data (or it is better to say
+ * prepended, as the buffer is filled from top to bottom). Old data is
+ * discarded when neededed, but availability of (10 * nrof_subbands)
+ * contiguous samples is always guaranteed for the input to the analysis
+ * filter. This is achieved by copying a sufficient part of old data
+ * to the top of the buffer on buffer wraparound.
+ */
+
+static int sbc_enc_process_input_4s(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels)
+{
+    if (nchannels > 1)
+        return sbc_encoder_process_input_s4_internal(
+            position, pcm, X, nsamples, 2);
+    else
+        return sbc_encoder_process_input_s4_internal(
+            position, pcm, X, nsamples, 1);
+}
+
+static int sbc_enc_process_input_8s(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels)
+{
+    if (nchannels > 1)
+        return sbc_encoder_process_input_s8_internal(
+            position, pcm, X, nsamples, 2);
+    else
+        return sbc_encoder_process_input_s8_internal(
+            position, pcm, X, nsamples, 1);
+}
+
+static void sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands)
+{
+    int ch, sb, blk;
+    for (ch = 0; ch < channels; ch++) {
+        for (sb = 0; sb < subbands; sb++) {
+            uint32_t x = 1 << SCALE_OUT_BITS;
+            for (blk = 0; blk < blocks; blk++) {
+                int32_t tmp = FFABS(sb_sample_f[blk][ch][sb]);
+                if (tmp != 0)
+                    x |= tmp - 1;
+            }
+            scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) -
+                ff_clz(x);
+        }
+    }
+}
+
+static int sbc_calc_scalefactors_j(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int subbands)
+{
+    int blk, joint = 0;
+    int32_t tmp0, tmp1;
+    uint32_t x, y;
+
+    /* last subband does not use joint stereo */
+    int sb = subbands - 1;
+    x = 1 << SCALE_OUT_BITS;
+    y = 1 << SCALE_OUT_BITS;
+    for (blk = 0; blk < blocks; blk++) {
+        tmp0 = FFABS(sb_sample_f[blk][0][sb]);
+        tmp1 = FFABS(sb_sample_f[blk][1][sb]);
+        if (tmp0 != 0)
+            x |= tmp0 - 1;
+        if (tmp1 != 0)
+            y |= tmp1 - 1;
+    }
+    scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
+    scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+    /* the rest of subbands can use joint stereo */
+    while (--sb >= 0) {
+        int32_t sb_sample_j[16][2];
+        x = 1 << SCALE_OUT_BITS;
+        y = 1 << SCALE_OUT_BITS;
+        for (blk = 0; blk < blocks; blk++) {
+            tmp0 = sb_sample_f[blk][0][sb];
+            tmp1 = sb_sample_f[blk][1][sb];
+            sb_sample_j[blk][0] = (tmp0 >> 1) + (tmp1 >> 1);
+            sb_sample_j[blk][1] = (tmp0 >> 1) - (tmp1 >> 1);
+            tmp0 = FFABS(tmp0);
+            tmp1 = FFABS(tmp1);
+            if (tmp0 != 0)
+                x |= tmp0 - 1;
+            if (tmp1 != 0)
+                y |= tmp1 - 1;
+        }
+        scale_factor[0][sb] = (31 - SCALE_OUT_BITS) -
+            ff_clz(x);
+        scale_factor[1][sb] = (31 - SCALE_OUT_BITS) -
+            ff_clz(y);
+        x = 1 << SCALE_OUT_BITS;
+        y = 1 << SCALE_OUT_BITS;
+        for (blk = 0; blk < blocks; blk++) {
+            tmp0 = FFABS(sb_sample_j[blk][0]);
+            tmp1 = FFABS(sb_sample_j[blk][1]);
+            if (tmp0 != 0)
+                x |= tmp0 - 1;
+            if (tmp1 != 0)
+                y |= tmp1 - 1;
+        }
+        x = (31 - SCALE_OUT_BITS) - ff_clz(x);
+        y = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+        /* decide whether to use joint stereo for this subband */
+        if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) {
+            joint |= 1 << (subbands - 1 - sb);
+            scale_factor[0][sb] = x;
+            scale_factor[1][sb] = y;
+            for (blk = 0; blk < blocks; blk++) {
+                sb_sample_f[blk][0][sb] = sb_sample_j[blk][0];
+                sb_sample_f[blk][1][sb] = sb_sample_j[blk][1];
+            }
+        }
+    }
+
+    /* bitmask with the information about subbands using joint stereo */
+    return joint;
+}
+
+/*
+ * Detect CPU features and setup function pointers
+ */
+av_cold void ff_sbcdsp_init(SBCDSPContext *s)
+{
+    /* Default implementation for analyze functions */
+    s->sbc_analyze_4 = sbc_analyze_4_simd;
+    s->sbc_analyze_8 = sbc_analyze_8_simd;
+    s->sbc_analyze_4s = sbc_analyze_4b_4s_simd;
+    if (s->increment == 1)
+        s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+    else
+        s->sbc_analyze_8s = sbc_analyze_4b_8s_simd;
+
+    /* Default implementation for input reordering / deinterleaving */
+    s->sbc_enc_process_input_4s = sbc_enc_process_input_4s;
+    s->sbc_enc_process_input_8s = sbc_enc_process_input_8s;
+
+    /* Default implementation for scale factors calculation */
+    s->sbc_calc_scalefactors = sbc_calc_scalefactors;
+    s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
+
+    if (ARCH_ARM)
+        ff_sbcdsp_init_arm(s);
+    if (ARCH_X86)
+        ff_sbcdsp_init_x86(s);
+}
diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
new file mode 100644
index 0000000000..334c058e6d
--- /dev/null
+++ b/libavcodec/sbcdsp.h
@@ -0,0 +1,86 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#ifndef AVCODEC_SBCDSP_H
+#define AVCODEC_SBCDSP_H
+
+#include "sbc.h"
+#include "sbcdsp_data.h"
+
+#define SCALE_OUT_BITS 15
+#define SBC_X_BUFFER_SIZE 328
+
+typedef struct sbc_dsp_context SBCDSPContext;
+
+struct sbc_dsp_context {
+    int position;
+    /* Number of consecutive blocks handled by the encoder */
+    uint8_t increment;
+    DECLARE_ALIGNED(SBC_ALIGN, int16_t, X)[2][SBC_X_BUFFER_SIZE];
+    void (*sbc_analyze_4)(const int16_t *in, int32_t *out, const int16_t *consts);
+    void (*sbc_analyze_8)(const int16_t *in, int32_t *out, const int16_t *consts);
+    /* Polyphase analysis filter for 4 subbands configuration,
+     * it handles "increment" blocks at once */
+    void (*sbc_analyze_4s)(SBCDSPContext *s,
+                           int16_t *x, int32_t *out, int out_stride);
+    /* Polyphase analysis filter for 8 subbands configuration,
+     * it handles "increment" blocks at once */
+    void (*sbc_analyze_8s)(SBCDSPContext *s,
+                           int16_t *x, int32_t *out, int out_stride);
+    /* Process input data (deinterleave, endian conversion, reordering),
+     * depending on the number of subbands and input data byte order */
+    int (*sbc_enc_process_input_4s)(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels);
+    int (*sbc_enc_process_input_8s)(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels);
+    /* Scale factors calculation */
+    void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands);
+    /* Scale factors calculation with joint stereo support */
+    int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int subbands);
+};
+
+/*
+ * Initialize pointers to the functions which are the basic "building bricks"
+ * of SBC codec. Best implementation is selected based on target CPU
+ * capabilities.
+ */
+void ff_sbcdsp_init(SBCDSPContext *s);
+
+void ff_sbcdsp_init_arm(SBCDSPContext *s);
+void ff_sbcdsp_init_x86(SBCDSPContext *s);
+
+#endif /* AVCODEC_SBCDSP_H */
diff --git a/libavcodec/sbcdsp_data.c b/libavcodec/sbcdsp_data.c
new file mode 100644
index 0000000000..3007a23bc7
--- /dev/null
+++ b/libavcodec/sbcdsp_data.c
@@ -0,0 +1,335 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#include "sbcdsp_data.h"
+
+#define F_PROTO4(x) (int32_t) ((x * 2) * \
+    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+#define F_COS4(x) (int32_t) ((x) * \
+    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+#define F_PROTO8(x) (int32_t) ((x * 2) * \
+    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+#define F_COS8(x) (int32_t) ((x) * \
+    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_even)[40 + 16] = {
+#define C0 1.0932568993
+#define C1 1.3056875580
+#define C2 1.3056875580
+#define C3 1.6772280856
+
+#define F(x) F_PROTO4(x)
+     F(0.00000000E+00 * C0),  F(3.83720193E-03 * C0),
+     F(5.36548976E-04 * C1),  F(2.73370904E-03 * C1),
+     F(3.06012286E-03 * C2),  F(3.89205149E-03 * C2),
+     F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3),
+     F(1.09137620E-02 * C0),  F(2.58767811E-02 * C0),
+     F(2.04385087E-02 * C1),  F(3.21939290E-02 * C1),
+     F(7.76463494E-02 * C2),  F(6.13245186E-03 * C2),
+     F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3),
+     F(1.35593274E-01 * C0),  F(2.94315332E-01 * C0),
+     F(1.94987841E-01 * C1),  F(2.81828203E-01 * C1),
+    -F(1.94987841E-01 * C2),  F(2.81828203E-01 * C2),
+     F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3),
+    -F(1.35593274E-01 * C0),  F(2.58767811E-02 * C0),
+    -F(7.76463494E-02 * C1),  F(6.13245186E-03 * C1),
+    -F(2.04385087E-02 * C2),  F(3.21939290E-02 * C2),
+     F(0.00000000E+00 * C3),  F(2.88217274E-02 * C3),
+    -F(1.09137620E-02 * C0),  F(3.83720193E-03 * C0),
+    -F(3.06012286E-03 * C1),  F(3.89205149E-03 * C1),
+    -F(5.36548976E-04 * C2),  F(2.73370904E-03 * C2),
+     F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3),
+#undef F
+#define F(x) F_COS4(x)
+     F(0.7071067812 / C0),  F(0.9238795325 / C1),
+    -F(0.7071067812 / C0),  F(0.3826834324 / C1),
+    -F(0.7071067812 / C0), -F(0.3826834324 / C1),
+     F(0.7071067812 / C0), -F(0.9238795325 / C1),
+     F(0.3826834324 / C2), -F(1.0000000000 / C3),
+    -F(0.9238795325 / C2), -F(1.0000000000 / C3),
+     F(0.9238795325 / C2), -F(1.0000000000 / C3),
+    -F(0.3826834324 / C2), -F(1.0000000000 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_odd)[40 + 16] = {
+#define C0 1.3056875580
+#define C1 1.6772280856
+#define C2 1.0932568993
+#define C3 1.3056875580
+
+#define F(x) F_PROTO4(x)
+     F(2.73370904E-03 * C0),  F(5.36548976E-04 * C0),
+    -F(1.49188357E-03 * C1),  F(0.00000000E+00 * C1),
+     F(3.83720193E-03 * C2),  F(1.09137620E-02 * C2),
+     F(3.89205149E-03 * C3),  F(3.06012286E-03 * C3),
+     F(3.21939290E-02 * C0),  F(2.04385087E-02 * C0),
+    -F(2.88757392E-02 * C1),  F(0.00000000E+00 * C1),
+     F(2.58767811E-02 * C2),  F(1.35593274E-01 * C2),
+     F(6.13245186E-03 * C3),  F(7.76463494E-02 * C3),
+     F(2.81828203E-01 * C0),  F(1.94987841E-01 * C0),
+    -F(2.46636662E-01 * C1),  F(0.00000000E+00 * C1),
+     F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2),
+     F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3),
+     F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0),
+     F(2.88217274E-02 * C1),  F(0.00000000E+00 * C1),
+     F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2),
+     F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3),
+     F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0),
+    -F(1.86581691E-03 * C1),  F(0.00000000E+00 * C1),
+     F(3.83720193E-03 * C2),  F(0.00000000E+00 * C2),
+     F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3),
+#undef F
+#define F(x) F_COS4(x)
+     F(0.9238795325 / C0), -F(1.0000000000 / C1),
+     F(0.3826834324 / C0), -F(1.0000000000 / C1),
+    -F(0.3826834324 / C0), -F(1.0000000000 / C1),
+    -F(0.9238795325 / C0), -F(1.0000000000 / C1),
+     F(0.7071067812 / C2),  F(0.3826834324 / C3),
+    -F(0.7071067812 / C2), -F(0.9238795325 / C3),
+    -F(0.7071067812 / C2),  F(0.9238795325 / C3),
+     F(0.7071067812 / C2), -F(0.3826834324 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_even)[80 + 64] = {
+#define C0 2.7906148894
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.5377944043
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO8(x)
+     F(0.00000000E+00 * C0),  F(2.01182542E-03 * C0),
+     F(1.56575398E-04 * C1),  F(1.78371725E-03 * C1),
+     F(3.43256425E-04 * C2),  F(1.47640169E-03 * C2),
+     F(5.54620202E-04 * C3),  F(1.13992507E-03 * C3),
+    -F(8.23919506E-04 * C4),  F(0.00000000E+00 * C4),
+     F(2.10371989E-03 * C5),  F(3.49717454E-03 * C5),
+     F(1.99454554E-03 * C6),  F(1.64973098E-03 * C6),
+     F(1.61656283E-03 * C7),  F(1.78805361E-04 * C7),
+     F(5.65949473E-03 * C0),  F(1.29371806E-02 * C0),
+     F(8.02941163E-03 * C1),  F(1.53184106E-02 * C1),
+     F(1.04584443E-02 * C2),  F(1.62208471E-02 * C2),
+     F(1.27472335E-02 * C3),  F(1.59045603E-02 * C3),
+    -F(1.46525263E-02 * C4),  F(0.00000000E+00 * C4),
+     F(8.85757540E-03 * C5),  F(5.31873032E-02 * C5),
+     F(2.92408442E-03 * C6),  F(3.90751381E-02 * C6),
+    -F(4.91578024E-03 * C7),  F(2.61098752E-02 * C7),
+     F(6.79989431E-02 * C0),  F(1.46955068E-01 * C0),
+     F(8.29847578E-02 * C1),  F(1.45389847E-01 * C1),
+     F(9.75753918E-02 * C2),  F(1.40753505E-01 * C2),
+     F(1.11196689E-01 * C3),  F(1.33264415E-01 * C3),
+    -F(1.23264548E-01 * C4),  F(0.00000000E+00 * C4),
+     F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+     F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+     F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+    -F(6.79989431E-02 * C0),  F(1.29371806E-02 * C0),
+    -F(5.31873032E-02 * C1),  F(8.85757540E-03 * C1),
+    -F(3.90751381E-02 * C2),  F(2.92408442E-03 * C2),
+    -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+     F(1.46404076E-02 * C4),  F(0.00000000E+00 * C4),
+     F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+     F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+     F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+    -F(5.65949473E-03 * C0),  F(2.01182542E-03 * C0),
+    -F(3.49717454E-03 * C1),  F(2.10371989E-03 * C1),
+    -F(1.64973098E-03 * C2),  F(1.99454554E-03 * C2),
+    -F(1.78805361E-04 * C3),  F(1.61656283E-03 * C3),
+    -F(9.02154502E-04 * C4),  F(0.00000000E+00 * C4),
+     F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+     F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+     F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS8(x)
+     F(0.7071067812 / C0),  F(0.8314696123 / C1),
+    -F(0.7071067812 / C0), -F(0.1950903220 / C1),
+    -F(0.7071067812 / C0), -F(0.9807852804 / C1),
+     F(0.7071067812 / C0), -F(0.5555702330 / C1),
+     F(0.7071067812 / C0),  F(0.5555702330 / C1),
+    -F(0.7071067812 / C0),  F(0.9807852804 / C1),
+    -F(0.7071067812 / C0),  F(0.1950903220 / C1),
+     F(0.7071067812 / C0), -F(0.8314696123 / C1),
+     F(0.9238795325 / C2),  F(0.9807852804 / C3),
+     F(0.3826834324 / C2),  F(0.8314696123 / C3),
+    -F(0.3826834324 / C2),  F(0.5555702330 / C3),
+    -F(0.9238795325 / C2),  F(0.1950903220 / C3),
+    -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+    -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+     F(0.3826834324 / C2), -F(0.8314696123 / C3),
+     F(0.9238795325 / C2), -F(0.9807852804 / C3),
+    -F(1.0000000000 / C4),  F(0.5555702330 / C5),
+    -F(1.0000000000 / C4), -F(0.9807852804 / C5),
+    -F(1.0000000000 / C4),  F(0.1950903220 / C5),
+    -F(1.0000000000 / C4),  F(0.8314696123 / C5),
+    -F(1.0000000000 / C4), -F(0.8314696123 / C5),
+    -F(1.0000000000 / C4), -F(0.1950903220 / C5),
+    -F(1.0000000000 / C4),  F(0.9807852804 / C5),
+    -F(1.0000000000 / C4), -F(0.5555702330 / C5),
+     F(0.3826834324 / C6),  F(0.1950903220 / C7),
+    -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+     F(0.9238795325 / C6),  F(0.8314696123 / C7),
+    -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+    -F(0.3826834324 / C6),  F(0.9807852804 / C7),
+     F(0.9238795325 / C6), -F(0.8314696123 / C7),
+    -F(0.9238795325 / C6),  F(0.5555702330 / C7),
+     F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_odd)[80 + 64] = {
+#define C0 2.5377944043
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.7906148894
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO8(x)
+     F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0),
+     F(1.56575398E-04 * C1),  F(1.78371725E-03 * C1),
+     F(3.43256425E-04 * C2),  F(1.47640169E-03 * C2),
+     F(5.54620202E-04 * C3),  F(1.13992507E-03 * C3),
+     F(2.01182542E-03 * C4),  F(5.65949473E-03 * C4),
+     F(2.10371989E-03 * C5),  F(3.49717454E-03 * C5),
+     F(1.99454554E-03 * C6),  F(1.64973098E-03 * C6),
+     F(1.61656283E-03 * C7),  F(1.78805361E-04 * C7),
+     F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0),
+     F(8.02941163E-03 * C1),  F(1.53184106E-02 * C1),
+     F(1.04584443E-02 * C2),  F(1.62208471E-02 * C2),
+     F(1.27472335E-02 * C3),  F(1.59045603E-02 * C3),
+     F(1.29371806E-02 * C4),  F(6.79989431E-02 * C4),
+     F(8.85757540E-03 * C5),  F(5.31873032E-02 * C5),
+     F(2.92408442E-03 * C6),  F(3.90751381E-02 * C6),
+    -F(4.91578024E-03 * C7),  F(2.61098752E-02 * C7),
+     F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0),
+     F(8.29847578E-02 * C1),  F(1.45389847E-01 * C1),
+     F(9.75753918E-02 * C2),  F(1.40753505E-01 * C2),
+     F(1.11196689E-01 * C3),  F(1.33264415E-01 * C3),
+     F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4),
+     F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+     F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+     F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+     F(0.00000000E+00 * C0),  F(1.46404076E-02 * C0),
+    -F(5.31873032E-02 * C1),  F(8.85757540E-03 * C1),
+    -F(3.90751381E-02 * C2),  F(2.92408442E-03 * C2),
+    -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+     F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4),
+     F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+     F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+     F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+     F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0),
+    -F(3.49717454E-03 * C1),  F(2.10371989E-03 * C1),
+    -F(1.64973098E-03 * C2),  F(1.99454554E-03 * C2),
+    -F(1.78805361E-04 * C3),  F(1.61656283E-03 * C3),
+     F(2.01182542E-03 * C4),  F(0.00000000E+00 * C4),
+     F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+     F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+     F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS8(x)
+    -F(1.0000000000 / C0),  F(0.8314696123 / C1),
+    -F(1.0000000000 / C0), -F(0.1950903220 / C1),
+    -F(1.0000000000 / C0), -F(0.9807852804 / C1),
+    -F(1.0000000000 / C0), -F(0.5555702330 / C1),
+    -F(1.0000000000 / C0),  F(0.5555702330 / C1),
+    -F(1.0000000000 / C0),  F(0.9807852804 / C1),
+    -F(1.0000000000 / C0),  F(0.1950903220 / C1),
+    -F(1.0000000000 / C0), -F(0.8314696123 / C1),
+     F(0.9238795325 / C2),  F(0.9807852804 / C3),
+     F(0.3826834324 / C2),  F(0.8314696123 / C3),
+    -F(0.3826834324 / C2),  F(0.5555702330 / C3),
+    -F(0.9238795325 / C2),  F(0.1950903220 / C3),
+    -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+    -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+     F(0.3826834324 / C2), -F(0.8314696123 / C3),
+     F(0.9238795325 / C2), -F(0.9807852804 / C3),
+     F(0.7071067812 / C4),  F(0.5555702330 / C5),
+    -F(0.7071067812 / C4), -F(0.9807852804 / C5),
+    -F(0.7071067812 / C4),  F(0.1950903220 / C5),
+     F(0.7071067812 / C4),  F(0.8314696123 / C5),
+     F(0.7071067812 / C4), -F(0.8314696123 / C5),
+    -F(0.7071067812 / C4), -F(0.1950903220 / C5),
+    -F(0.7071067812 / C4),  F(0.9807852804 / C5),
+     F(0.7071067812 / C4), -F(0.5555702330 / C5),
+     F(0.3826834324 / C6),  F(0.1950903220 / C7),
+    -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+     F(0.9238795325 / C6),  F(0.8314696123 / C7),
+    -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+    -F(0.3826834324 / C6),  F(0.9807852804 / C7),
+     F(0.9238795325 / C6), -F(0.8314696123 / C7),
+    -F(0.9238795325 / C6),  F(0.5555702330 / C7),
+     F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};
diff --git a/libavcodec/sbcdsp_data.h b/libavcodec/sbcdsp_data.h
new file mode 100644
index 0000000000..12839fb3c3
--- /dev/null
+++ b/libavcodec/sbcdsp_data.h
@@ -0,0 +1,57 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#ifndef AVCODEC_SBCDSP_DATA_H
+#define AVCODEC_SBCDSP_DATA_H
+
+#include "sbc.h"
+
+#define SBC_PROTO_FIXED4_SCALE      ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
+#define SBC_COS_TABLE_FIXED4_SCALE  ((sizeof(int16_t) * CHAR_BIT - 1)    )
+#define SBC_PROTO_FIXED8_SCALE      ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
+#define SBC_COS_TABLE_FIXED8_SCALE  ((sizeof(int16_t) * CHAR_BIT - 1)    )
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_odd[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_odd[];
+
+#endif /* AVCODEC_SBCDSP_DATA_H */
diff --git a/libavcodec/sbcenc.c b/libavcodec/sbcenc.c
new file mode 100644
index 0000000000..94a0331495
--- /dev/null
+++ b/libavcodec/sbcenc.c
@@ -0,0 +1,461 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC encoder implementation
+ */
+
+#include <stdbool.h>
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "put_bits.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+
+typedef struct SBCEncContext {
+    AVClass *class;
+
+    uint8_t frequency;
+    int blocks;
+    int subbands;
+    uint8_t mode;
+    int allocation;
+    int bitpool;
+
+    int joint_stereo;
+    int dual_channel;
+
+    bool init;
+    bool msbc;
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_frame, frame);
+    DECLARE_ALIGNED(SBC_ALIGN, SBCDSPContext, dsp);
+    size_t (*pack_frame)(AVPacket *avpkt, struct sbc_frame *frame, int joint);
+} SBCEncContext;
+
+static int sbc_analyze_audio(SBCDSPContext *s, struct sbc_frame *frame)
+{
+    int ch, blk;
+    int16_t *x;
+
+    switch (frame->subbands) {
+    case 4:
+        for (ch = 0; ch < frame->channels; ch++) {
+            x = &s->X[ch][s->position - 4 *
+                    s->increment + frame->blocks * 4];
+            for (blk = 0; blk < frame->blocks;
+                        blk += s->increment) {
+                s->sbc_analyze_4s(
+                    s, x,
+                    frame->sb_sample_f[blk][ch],
+                    frame->sb_sample_f[blk + 1][ch] -
+                    frame->sb_sample_f[blk][ch]);
+                x -= 4 * s->increment;
+            }
+        }
+        return frame->blocks * 4;
+
+    case 8:
+        for (ch = 0; ch < frame->channels; ch++) {
+            x = &s->X[ch][s->position - 8 *
+                    s->increment + frame->blocks * 8];
+            for (blk = 0; blk < frame->blocks;
+                        blk += s->increment) {
+                s->sbc_analyze_8s(
+                    s, x,
+                    frame->sb_sample_f[blk][ch],
+                    frame->sb_sample_f[blk + 1][ch] -
+                    frame->sb_sample_f[blk][ch]);
+                x -= 8 * s->increment;
+            }
+        }
+        return frame->blocks * 8;
+
+    default:
+        return AVERROR(EIO);
+    }
+}
+
+/*
+ * Packs the SBC frame from frame into the memory in avpkt.
+ * Returns the length of the packed frame.
+ */
+
+static av_always_inline size_t sbc_pack_frame_internal(AVPacket *avpkt,
+                    struct sbc_frame *frame, int frame_subbands,
+                    int frame_channels, int joint)
+{
+    PutBitContext pb;
+
+    /* Will copy the header parts for CRC-8 calculation here */
+    uint8_t crc_header[11] = { 0 };
+    int crc_pos = 0;
+
+    uint32_t audio_sample;
+
+    int ch, sb, blk;        /* channel, subband, block and bit counters */
+    int bits[2][8];         /* bits distribution */
+    uint32_t levels[2][8];  /* levels are derived from that */
+    uint32_t sb_sample_delta[2][8];
+
+    /* Can't fill in crc yet */
+    crc_header[0] = avpkt->data[1];
+    crc_header[1] = avpkt->data[2];
+    crc_pos = 16;
+
+    init_put_bits(&pb, avpkt->data + 4, avpkt->size);
+
+    if (frame->mode == JOINT_STEREO) {
+        put_bits(&pb, frame_subbands, joint);
+        crc_header[crc_pos >> 3] = joint;
+        crc_pos += frame_subbands;
+    }
+
+    for (ch = 0; ch < frame_channels; ch++) {
+        for (sb = 0; sb < frame_subbands; sb++) {
+            put_bits(&pb, 4, frame->scale_factor[ch][sb] & 0x0F);
+            crc_header[crc_pos >> 3] <<= 4;
+            crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F;
+            crc_pos += 4;
+        }
+    }
+
+    /* align the last crc byte */
+    if (crc_pos % 8)
+        crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8);
+
+    avpkt->data[3] = ff_sbc_crc8(crc_header, crc_pos);
+
+    ff_sbc_calculate_bits(frame, bits);
+
+    for (ch = 0; ch < frame_channels; ch++) {
+        for (sb = 0; sb < frame_subbands; sb++) {
+            levels[ch][sb] = ((1 << bits[ch][sb]) - 1) <<
+                (32 - (frame->scale_factor[ch][sb] +
+                    SCALE_OUT_BITS + 2));
+            sb_sample_delta[ch][sb] = (uint32_t) 1 <<
+                (frame->scale_factor[ch][sb] +
+                    SCALE_OUT_BITS + 1);
+        }
+    }
+
+    for (blk = 0; blk < frame->blocks; blk++) {
+        for (ch = 0; ch < frame_channels; ch++) {
+            for (sb = 0; sb < frame_subbands; sb++) {
+
+                if (bits[ch][sb] == 0)
+                    continue;
+
+                audio_sample = ((uint64_t) levels[ch][sb] *
+                    (sb_sample_delta[ch][sb] +
+                    frame->sb_sample_f[blk][ch][sb])) >> 32;
+
+                put_bits(&pb, bits[ch][sb], audio_sample);
+            }
+        }
+    }
+
+    flush_put_bits(&pb);
+
+    return (put_bits_count(&pb) + 7) / 8;
+}
+
+static size_t sbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame, int joint)
+{
+    int frame_subbands = 4;
+
+    avpkt->data[0] = SBC_SYNCWORD;
+
+    avpkt->data[1] = (frame->frequency & 0x03) << 6;
+    avpkt->data[1] |= (frame->block_mode & 0x03) << 4;
+    avpkt->data[1] |= (frame->mode & 0x03) << 2;
+    avpkt->data[1] |= (frame->allocation & 0x01) << 1;
+
+    avpkt->data[2] = frame->bitpool;
+
+    if (frame->subbands != 4)
+        frame_subbands = 8;
+
+    if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) &&
+            frame->bitpool > frame_subbands << 4)
+        return -5;
+
+    if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) &&
+            frame->bitpool > frame_subbands << 5)
+        return -5;
+
+    if (frame->subbands == 4) {
+        if (frame->channels == 1)
+            return sbc_pack_frame_internal(avpkt, frame, 4, 1, joint);
+        else
+            return sbc_pack_frame_internal(avpkt, frame, 4, 2, joint);
+    } else {
+        avpkt->data[1] |= 0x01;
+        if (frame->channels == 1)
+            return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
+        else
+            return sbc_pack_frame_internal(avpkt, frame, 8, 2, joint);
+    }
+}
+
+static size_t msbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame, int joint)
+{
+    avpkt->data[0] = MSBC_SYNCWORD;
+    avpkt->data[1] = 0;
+    avpkt->data[2] = 0;
+
+    return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
+}
+
+static void sbc_encoder_init(bool msbc, SBCDSPContext *s,
+                             const struct sbc_frame *frame)
+{
+    memset(&s->X, 0, sizeof(s->X));
+    s->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7;
+    if (msbc)
+        s->increment = 1;
+    else
+        s->increment = 4;
+
+    ff_sbcdsp_init(s);
+}
+
+static int sbc_encode_init(AVCodecContext *avctx)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+
+    if (sbc->joint_stereo && sbc->dual_channel) {
+        av_log(avctx, AV_LOG_ERROR, "joint_stereo and dual_channel "
+                                    "can't be used at the same time.\n");
+        return AVERROR(EINVAL);
+    }
+
+    sbc->pack_frame = sbc_pack_frame;
+
+    sbc->frequency = SBC_FREQ_44100;
+    sbc->mode = SBC_MODE_STEREO;
+    if (sbc->joint_stereo)
+        sbc->mode = SBC_MODE_JOINT_STEREO;
+    else if (sbc->dual_channel)
+        sbc->mode = SBC_MODE_DUAL_CHANNEL;
+    sbc->subbands >>= 3;
+    sbc->blocks = (sbc->blocks >> 2) - 1;
+
+    if (!avctx->frame_size)
+        avctx->frame_size = 4*(sbc->subbands + 1) * 4*(sbc->blocks + 1);
+
+    for (int i = 0; avctx->codec->supported_samplerates[i]; i++)
+        if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
+            sbc->frequency = i;
+
+    if (avctx->channels == 1)
+        sbc->mode = SBC_MODE_MONO;
+
+    return 0;
+}
+
+static int msbc_encode_init(AVCodecContext *avctx)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+
+    sbc->msbc = true;
+    sbc->pack_frame = msbc_pack_frame;
+
+    sbc->frequency = SBC_FREQ_16000;
+    sbc->blocks = MSBC_BLOCKS;
+    sbc->subbands = SBC_SB_8;
+    sbc->mode = SBC_MODE_MONO;
+    sbc->allocation = SBC_AM_LOUDNESS;
+    sbc->bitpool = 26;
+
+    if (!avctx->frame_size)
+        avctx->frame_size = 8 * MSBC_BLOCKS;
+
+    return 0;
+}
+
+/* Returns the output block size in bytes */
+static size_t sbc_get_frame_length(SBCEncContext *sbc)
+{
+    int ret;
+    uint8_t subbands, channels, blocks, joint, bitpool;
+
+    if (sbc->init && sbc->frame.bitpool == sbc->bitpool)
+        return sbc->frame.length;
+
+    subbands = sbc->subbands ? 8 : 4;
+    if (sbc->msbc)
+        blocks = MSBC_BLOCKS;
+    else
+        blocks = 4 + (sbc->blocks * 4);
+    channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
+    joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0;
+    bitpool = sbc->bitpool;
+
+    ret = 4 + (4 * subbands * channels) / 8;
+    /* This term is not always evenly divide so we round it up */
+    if (channels == 1 || sbc->mode == SBC_MODE_DUAL_CHANNEL)
+        ret += ((blocks * channels * bitpool) + 7) / 8;
+    else
+        ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8;
+
+    return ret;
+}
+
+static int sbc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                            const AVFrame *frame, int *got_packet_ptr)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+    int (*sbc_enc_process_input)(int position,
+            const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+            int nsamples, int nchannels);
+    int ret;
+
+    if (!sbc)
+        return AVERROR(EIO);
+
+    if (!sbc->init) {
+        sbc->frame.frequency = sbc->frequency;
+        sbc->frame.mode = sbc->mode;
+        sbc->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
+        sbc->frame.allocation = sbc->allocation;
+        sbc->frame.subband_mode = sbc->subbands;
+        sbc->frame.subbands = sbc->subbands ? 8 : 4;
+        sbc->frame.block_mode = sbc->blocks;
+        if (sbc->msbc)
+            sbc->frame.blocks = MSBC_BLOCKS;
+        else
+            sbc->frame.blocks = 4 + (sbc->blocks * 4);
+        sbc->frame.bitpool = sbc->bitpool;
+        sbc->frame.codesize = sbc->frame.subbands * sbc->frame.blocks
+                              * sbc->frame.channels * 2;
+        sbc->frame.length = sbc_get_frame_length(sbc);
+
+        sbc_encoder_init(sbc->msbc, &sbc->dsp, &sbc->frame);
+        sbc->init = true;
+    } else if (sbc->frame.bitpool != sbc->bitpool) {
+        sbc->frame.length = sbc_get_frame_length(sbc);
+        sbc->frame.bitpool = sbc->bitpool;
+    }
+
+    /* input must be large enough to encode a complete frame */
+    if (frame->nb_samples * sbc->frame.channels * 2 < sbc->frame.codesize)
+        return 0;
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, sbc->frame.length, 0)) < 0)
+        return ret;
+
+    /* Select the needed input data processing function and call it */
+    if (sbc->frame.subbands == 8) {
+        sbc_enc_process_input = sbc->dsp.sbc_enc_process_input_8s;
+    } else {
+        sbc_enc_process_input = sbc->dsp.sbc_enc_process_input_4s;
+    }
+
+    sbc->dsp.position = sbc_enc_process_input(
+        sbc->dsp.position, frame->data[0],
+        sbc->dsp.X, sbc->frame.subbands * sbc->frame.blocks,
+        sbc->frame.channels);
+
+    sbc_analyze_audio(&sbc->dsp, &sbc->frame);
+
+    if (sbc->frame.mode == JOINT_STEREO) {
+        int j = sbc->dsp.sbc_calc_scalefactors_j(
+            sbc->frame.sb_sample_f, sbc->frame.scale_factor,
+            sbc->frame.blocks, sbc->frame.subbands);
+        sbc->pack_frame(avpkt, &sbc->frame, j);
+    } else {
+        sbc->dsp.sbc_calc_scalefactors(
+            sbc->frame.sb_sample_f, sbc->frame.scale_factor,
+            sbc->frame.blocks, sbc->frame.channels,
+            sbc->frame.subbands);
+        sbc->pack_frame(avpkt, &sbc->frame, 0);
+    }
+
+    *got_packet_ptr = 1;
+    return 0;
+}
+
+#define OFFSET(x) offsetof(SBCEncContext, x)
+#define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "joint_stereo", "use joint stereo",
+      OFFSET(joint_stereo), AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { "dual_channel", "use dual channel",
+      OFFSET(dual_channel), AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { "subbands",     "number of subbands (4 or 8)",
+      OFFSET(subbands),     AV_OPT_TYPE_INT,  { .i64 =  8 }, 4,   8, AE },
+    { "bitpool",      "bitpool value",
+      OFFSET(bitpool),      AV_OPT_TYPE_INT,  { .i64 = 32 }, 0, 255, AE },
+    { "blocks",       "number of blocks (4, 8, 12 or 16)",
+      OFFSET(blocks),       AV_OPT_TYPE_INT,  { .i64 = 16 }, 4,  16, AE },
+    { "snr",          "use SNR mode (instead of loudness)",
+      OFFSET(allocation),   AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { NULL },
+};
+
+static const AVClass sbc_class = {
+    .class_name = "sbc encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+#if CONFIG_SBC_ENCODER
+AVCodec ff_sbc_encoder = {
+    .name                  = "sbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_SBC,
+    .priv_data_size        = sizeof(SBCEncContext),
+    .init                  = sbc_encode_init,
+    .encode2               = sbc_encode_frame,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                                  AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000, 0 },
+    .priv_class            = &sbc_class,
+};
+#endif
+
+#if CONFIG_MSBC_ENCODER
+AVCodec ff_msbc_encoder = {
+    .name                  = "msbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono SBC)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_MSBC,
+    .priv_data_size        = sizeof(SBCEncContext),
+    .init                  = msbc_encode_init,
+    .encode2               = sbc_encode_frame,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 0 },
+};
+#endif
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index a805cd37b4..2350c8bbee 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER)             += x86/pngdsp_init.o
 OBJS-$(CONFIG_PRORES_DECODER)          += x86/proresdsp_init.o
 OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp_init.o
 OBJS-$(CONFIG_RV40_DECODER)            += x86/rv40dsp_init.o
+OBJS-$(CONFIG_SBC_ENCODER)             += x86/sbcdsp_init.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
 OBJS-$(CONFIG_TAK_DECODER)             += x86/takdsp_init.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp_init.o
@@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER)      += x86/pngdsp.o
 X86ASM-OBJS-$(CONFIG_PRORES_DECODER)   += x86/proresdsp.o
 X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
 X86ASM-OBJS-$(CONFIG_RV40_DECODER)     += x86/rv40dsp.o
+X86ASM-OBJS-$(CONFIG_SBC_ENCODER)      += x86/sbcdsp.o
 X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER)     += x86/svq1enc.o
 X86ASM-OBJS-$(CONFIG_TAK_DECODER)      += x86/takdsp.o
 X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER)   += x86/mlpdsp.o
diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm
new file mode 100644
index 0000000000..ecf5298490
--- /dev/null
+++ b/libavcodec/x86/sbcdsp.asm
@@ -0,0 +1,290 @@
+;******************************************************************************
+;* SIMD optimized SBC encoder DSP functions
+;*
+;* Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+;* Copyright (C) 2008-2010  Nokia Corporation
+;* Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+;* Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+;* Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+scale_mask: times 2 dd 0x8000    ; 1 << (SBC_PROTO_FIXED4_SCALE - 1)
+
+SECTION .text
+
+;*******************************************************************
+;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts);
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
+    movq          m0, [inq]
+    movq          m1, [inq+8]
+    pmaddwd       m0, [constsq]
+    pmaddwd       m1, [constsq+8]
+    paddd         m0, [scale_mask]
+    paddd         m1, [scale_mask]
+
+    movq          m2, [inq+16]
+    movq          m3, [inq+24]
+    pmaddwd       m2, [constsq+16]
+    pmaddwd       m3, [constsq+24]
+    paddd         m0, m2
+    paddd         m1, m3
+
+    movq          m2, [inq+32]
+    movq          m3, [inq+40]
+    pmaddwd       m2, [constsq+32]
+    pmaddwd       m3, [constsq+40]
+    paddd         m0, m2
+    paddd         m1, m3
+
+    movq          m2, [inq+48]
+    movq          m3, [inq+56]
+    pmaddwd       m2, [constsq+48]
+    pmaddwd       m3, [constsq+56]
+    paddd         m0, m2
+    paddd         m1, m3
+
+    movq          m2, [inq+64]
+    movq          m3, [inq+72]
+    pmaddwd       m2, [constsq+64]
+    pmaddwd       m3, [constsq+72]
+    paddd         m0, m2
+    paddd         m1, m3
+
+    psrad         m0, 16    ; SBC_PROTO_FIXED4_SCALE
+    psrad         m1, 16    ; SBC_PROTO_FIXED4_SCALE
+    packssdw      m0, m0
+    packssdw      m1, m1
+
+    movq          m2, m0
+    pmaddwd       m0, [constsq+80]
+    pmaddwd       m2, [constsq+88]
+
+    movq          m3, m1
+    pmaddwd       m1, [constsq+96]
+    pmaddwd       m3, [constsq+104]
+    paddd         m0, m1
+    paddd         m2, m3
+
+    movq          [outq  ], m0
+    movq          [outq+8], m2
+
+    RET
+
+
+
+;*******************************************************************
+;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t *consts);
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_analyze_8, 3, 3, 4, in, out, consts
+    movq          m0, [inq]
+    movq          m1, [inq+8]
+    movq          m2, [inq+16]
+    movq          m3, [inq+24]
+    pmaddwd       m0, [constsq]
+    pmaddwd       m1, [constsq+8]
+    pmaddwd       m2, [constsq+16]
+    pmaddwd       m3, [constsq+24]
+    paddd         m0, [scale_mask]
+    paddd         m1, [scale_mask]
+    paddd         m2, [scale_mask]
+    paddd         m3, [scale_mask]
+
+    movq          m4, [inq+32]
+    movq          m5, [inq+40]
+    movq          m6, [inq+48]
+    movq          m7, [inq+56]
+    pmaddwd       m4, [constsq+32]
+    pmaddwd       m5, [constsq+40]
+    pmaddwd       m6, [constsq+48]
+    pmaddwd       m7, [constsq+56]
+    paddd         m0, m4
+    paddd         m1, m5
+    paddd         m2, m6
+    paddd         m3, m7
+
+    movq          m4, [inq+64]
+    movq          m5, [inq+72]
+    movq          m6, [inq+80]
+    movq          m7, [inq+88]
+    pmaddwd       m4, [constsq+64]
+    pmaddwd       m5, [constsq+72]
+    pmaddwd       m6, [constsq+80]
+    pmaddwd       m7, [constsq+88]
+    paddd         m0, m4
+    paddd         m1, m5
+    paddd         m2, m6
+    paddd         m3, m7
+
+    movq          m4, [inq+96]
+    movq          m5, [inq+104]
+    movq          m6, [inq+112]
+    movq          m7, [inq+120]
+    pmaddwd       m4, [constsq+96]
+    pmaddwd       m5, [constsq+104]
+    pmaddwd       m6, [constsq+112]
+    pmaddwd       m7, [constsq+120]
+    paddd         m0, m4
+    paddd         m1, m5
+    paddd         m2, m6
+    paddd         m3, m7
+
+    movq          m4, [inq+128]
+    movq          m5, [inq+136]
+    movq          m6, [inq+144]
+    movq          m7, [inq+152]
+    pmaddwd       m4, [constsq+128]
+    pmaddwd       m5, [constsq+136]
+    pmaddwd       m6, [constsq+144]
+    pmaddwd       m7, [constsq+152]
+    paddd         m0, m4
+    paddd         m1, m5
+    paddd         m2, m6
+    paddd         m3, m7
+
+    psrad         m0, 16    ; SBC_PROTO_FIXED8_SCALE
+    psrad         m1, 16    ; SBC_PROTO_FIXED8_SCALE
+    psrad         m2, 16    ; SBC_PROTO_FIXED8_SCALE
+    psrad         m3, 16    ; SBC_PROTO_FIXED8_SCALE
+
+    packssdw      m0, m0
+    packssdw      m1, m1
+    packssdw      m2, m2
+    packssdw      m3, m3
+
+    movq          m4, m0
+    movq          m5, m0
+    pmaddwd       m4, [constsq+160]
+    pmaddwd       m5, [constsq+168]
+
+    movq          m6, m1
+    movq          m7, m1
+    pmaddwd       m6, [constsq+192]
+    pmaddwd       m7, [constsq+200]
+    paddd         m4, m6
+    paddd         m5, m7
+
+    movq          m6, m2
+    movq          m7, m2
+    pmaddwd       m6, [constsq+224]
+    pmaddwd       m7, [constsq+232]
+    paddd         m4, m6
+    paddd         m5, m7
+
+    movq          m6, m3
+    movq          m7, m3
+    pmaddwd       m6, [constsq+256]
+    pmaddwd       m7, [constsq+264]
+    paddd         m4, m6
+    paddd         m5, m7
+
+    movq          [outq  ], m4
+    movq          [outq+8], m5
+
+    movq          m5, m0
+    pmaddwd       m0, [constsq+176]
+    pmaddwd       m5, [constsq+184]
+
+    movq          m7, m1
+    pmaddwd       m1, [constsq+208]
+    pmaddwd       m7, [constsq+216]
+    paddd         m0, m1
+    paddd         m5, m7
+
+    movq          m7, m2
+    pmaddwd       m2, [constsq+240]
+    pmaddwd       m7, [constsq+248]
+    paddd         m0, m2
+    paddd         m5, m7
+
+    movq          m7, m3
+    pmaddwd       m3, [constsq+272]
+    pmaddwd       m7, [constsq+280]
+    paddd         m0, m3
+    paddd         m5, m7
+
+    movq          [outq+16], m0
+    movq          [outq+24], m5
+
+    RET
+
+
+;*******************************************************************
+;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
+;                              uint32_t scale_factor[2][8],
+;                              int blocks, int channels, int subbands)
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_calc_scalefactors, 5, 9, 3, sb_sample_f, scale_factor, blocks, channels, subbands, ch, sb, sa, sf, blk
+    shl           channelsd, 5
+    mov           chq, 0
+.loop_1:
+    lea           saq, [sb_sample_fq + chq]
+    lea           sfq, [scale_factorq + chq]
+
+    mov           sbd, 0
+.loop_2:
+    ; blk = (blocks - 1) * 64;
+    lea           blkq, [blocksq - 1]
+    shl           blkd, 6
+
+    movq          m0, [scale_mask]
+.loop_3:
+    movq          m1, [saq+blkq]
+    pxor          m2, m2
+    pcmpgtd       m1, m2
+    paddd         m1, [saq+blkq]
+    pcmpgtd       m2, m1
+    pxor          m1, m2
+
+    por           m0, m1
+
+    sub           blkd, 64
+    jns           .loop_3
+
+    movd          blkd, m0
+    psrlq         m0,   32
+    bsr           blkd, blkd
+    sub           blkd, 15    ; SCALE_OUT_BITS
+    mov           [sfq], blkd
+
+    movd          blkd, m0
+    bsr           blkd, blkd
+    sub           blkd, 15    ; SCALE_OUT_BITS
+    mov           [sfq+4], blkd
+
+    add           saq, 8
+    add           sfq, 8
+
+    add           sbd, 2
+    cmp           sbd, subbandsd
+    jl            .loop_2
+
+    add           chd, 32
+    cmp           chd, channelsd
+    jl            .loop_1
+
+    emms
+    RET
diff --git a/libavcodec/x86/sbcdsp_init.c b/libavcodec/x86/sbcdsp_init.c
new file mode 100644
index 0000000000..86effecfdf
--- /dev/null
+++ b/libavcodec/x86/sbcdsp_init.c
@@ -0,0 +1,51 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC MMX optimization for some basic "building bricks"
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/sbcdsp.h"
+
+void ff_sbc_analyze_4_mmx(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_mmx(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_calc_scalefactors_mmx(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands);
+
+av_cold void ff_sbcdsp_init_x86(SBCDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        s->sbc_analyze_4 = ff_sbc_analyze_4_mmx;
+        s->sbc_analyze_8 = ff_sbc_analyze_8_mmx;
+        s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_mmx;
+    }
+}
-- 
2.15.0



More information about the ffmpeg-devel mailing list