[FFmpeg-devel] [PATCHv2 2/2] lavu/lfg: switch to Ziggurat algorithm for normal random number generation (WIP)

Ganesh Ajjanagadde gajjanag at gmail.com
Sat Mar 12 14:58:41 CET 2016


Code taken from the Julia project, licensed under MIT:
https://github.com/JuliaLang/julia/blob/master/base/random.jl, in turn
derived from: "The Ziggurat Method for generating random variables" - Marsaglia and Tsang.

Paper and reference code: http://www.jstatsoft.org/v05/i08/. This is
well known to be the fastest method empirically for generating normal random
variables for a fixed PRNG source.

Note that there are a large number of implementations with
various tunings, this was one of the simpler ones and also has a friendly
license.

This results in ~ 3x speedup of random number generation:
old:
  15090 decicycles in av_bmg_get,       1 runs,      0 skips
  13140 decicycles in av_bmg_get,       2 runs,      0 skips
  10117 decicycles in av_bmg_get,       4 runs,      0 skips
  [...]
   2133 decicycles in av_bmg_get,  524268 runs,     20 skips=60.4x
   2134 decicycles in av_bmg_get, 1048531 runs,     45 skips=61.3x
   2135 decicycles in av_bmg_get, 2097061 runs,     91 skips=61.9x

new:
   7650 decicycles in av_gaussian_get,       1 runs,      0 skips
   5490 decicycles in av_gaussian_get,       2 runs,      0 skips
   3982 decicycles in av_gaussian_get,       4 runs,      0 skips
   [...]
    812 decicycles in av_gaussian_get,  524281 runs,      7 skips
    813 decicycles in av_gaussian_get, 1048563 runs,     13 skips
    813 decicycles in av_gaussian_get, 2097131 runs,     21 skips

and accordingly a ~2% speedup in aac encoding (-march=native, Haswell, clang):

old:
ffmpeg -f lavfi -i anoisesrc -t 300 -y sin_new.aac  5.30s user 0.02s system 99% cpu 5.322 total
new:
ffmpeg -f lavfi -i anoisesrc -t 300 -y sin_new.aac  5.16s user 0.03s system 99% cpu 5.198 total

Function added as av_gaussian_get with documentation, minor bumped.

Signed-off-by: Ganesh Ajjanagadde <gajjanag at gmail.com>
---
 libavcodec/aaccoder.c |   2 +-
 libavutil/lfg.c       | 304 +++++++++++++++++++++++++++++++++++++++++++++++++-
 libavutil/lfg.h       |   9 ++
 libavutil/version.h   |   2 +-
 4 files changed, 314 insertions(+), 3 deletions(-)

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index bca1f59..d44bb50 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -531,7 +531,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
                 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                 for (i = 0; i < sce->ics.swb_sizes[g]; i+=2) {
                     double rnd[2];
-                    av_bmg_get(&s->lfg, rnd);
+                    av_gaussian_get(&s->lfg, rnd, 2);
                     PNS[i+0] = (float)rnd[0];
                     PNS[i+1] = (float)rnd[1];
                 }
diff --git a/libavutil/lfg.c b/libavutil/lfg.c
index 5ffd76f..7163c6b 100644
--- a/libavutil/lfg.c
+++ b/libavutil/lfg.c
@@ -44,6 +44,302 @@ av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
     c->index = 0;
 }
 
+/*  Code adapted from the Julia project:
+    https://github.com/JuliaLang/julia/blob/master/base/random.jl.
+    This is licensed under MIT (https://github.com/JuliaLang/julia/blob/master/LICENSE.md)
+    Copyright (c) 2009-2016: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors:
+
+    https://github.com/JuliaLang/julia/contributors
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+    /* The Ziggurat Method for generating random variables - Marsaglia and Tsang
+    Paper and reference code: http://www.jstatsoft.org/v05/i08/. */
+    /* Tables for normal distribution */
+static const uint64_t ki[] = {
+    UINT64_C(0x0007799ec012f7b2),UINT64_C(0x0000000000000000),UINT64_C(0x0006045f4c7de363),UINT64_C(0x0006d1aa7d5ec0a5),
+    UINT64_C(0x000728fb3f60f777),UINT64_C(0x0007592af4e9fbc0),UINT64_C(0x000777a5c0bf655d),UINT64_C(0x00078ca3857d2256),
+    UINT64_C(0x00079bf6b0ffe58b),UINT64_C(0x0007a7a34ab092ad),UINT64_C(0x0007b0d2f20dd1cb),UINT64_C(0x0007b83d3aa9cb52),
+    UINT64_C(0x0007be597614224d),UINT64_C(0x0007c3788631abe9),UINT64_C(0x0007c7d32bc192ee),UINT64_C(0x0007cb9263a6e86d),
+    UINT64_C(0x0007ced483edfa84),UINT64_C(0x0007d1b07ac0fd39),UINT64_C(0x0007d437ef2da5fc),UINT64_C(0x0007d678b069aa6e),
+    UINT64_C(0x0007d87db38c5c87),UINT64_C(0x0007da4fc6a9ba62),UINT64_C(0x0007dbf611b37f3b),UINT64_C(0x0007dd7674d0f286),
+    UINT64_C(0x0007ded5ce8205f6),UINT64_C(0x0007e018307fb62b),UINT64_C(0x0007e141081bd124),UINT64_C(0x0007e2533d712de8),
+    UINT64_C(0x0007e3514bbd7718),UINT64_C(0x0007e43d54944b52),UINT64_C(0x0007e5192f25ef42),UINT64_C(0x0007e5e67481118d),
+    UINT64_C(0x0007e6a6897c1ce2),UINT64_C(0x0007e75aa6c7f64c),UINT64_C(0x0007e803df8ee498),UINT64_C(0x0007e8a326eb6272),
+    UINT64_C(0x0007e93954717a28),UINT64_C(0x0007e9c727f8648f),UINT64_C(0x0007ea4d4cc85a3c),UINT64_C(0x0007eacc5c4907a9),
+    UINT64_C(0x0007eb44e0474cf6),UINT64_C(0x0007ebb754e47419),UINT64_C(0x0007ec242a3d8474),UINT64_C(0x0007ec8bc5d69645),
+    UINT64_C(0x0007ecee83d3d6e9),UINT64_C(0x0007ed4cb8082f45),UINT64_C(0x0007eda6aee0170f),UINT64_C(0x0007edfcae2dfe68),
+    UINT64_C(0x0007ee4ef5dccd3e),UINT64_C(0x0007ee9dc08c394e),UINT64_C(0x0007eee9441a17c7),UINT64_C(0x0007ef31b21b4fb1),
+    UINT64_C(0x0007ef773846a8a7),UINT64_C(0x0007efba00d35a17),UINT64_C(0x0007effa32ccf69f),UINT64_C(0x0007f037f25e1278),
+    UINT64_C(0x0007f0736112d12c),UINT64_C(0x0007f0ac9e145c25),UINT64_C(0x0007f0e3c65e1fcc),UINT64_C(0x0007f118f4ed8e54),
+    UINT64_C(0x0007f14c42ed0dc8),UINT64_C(0x0007f17dc7daa0c3),UINT64_C(0x0007f1ad99aac6a5),UINT64_C(0x0007f1dbcce80015),
+    UINT64_C(0x0007f20874cf56bf),UINT64_C(0x0007f233a36a3b9a),UINT64_C(0x0007f25d69a604ad),UINT64_C(0x0007f285d7694a92),
+    UINT64_C(0x0007f2acfba75e3b),UINT64_C(0x0007f2d2e4720909),UINT64_C(0x0007f2f79f09c344),UINT64_C(0x0007f31b37ec883b),
+    UINT64_C(0x0007f33dbae36abc),UINT64_C(0x0007f35f330f08d5),UINT64_C(0x0007f37faaf2fa79),UINT64_C(0x0007f39f2c805380),
+    UINT64_C(0x0007f3bdc11f4f1c),UINT64_C(0x0007f3db71b83850),UINT64_C(0x0007f3f846bba121),UINT64_C(0x0007f4144829f846),
+    UINT64_C(0x0007f42f7d9a8b9d),UINT64_C(0x0007f449ee420432),UINT64_C(0x0007f463a0f8675e),UINT64_C(0x0007f47c9c3ea77b),
+    UINT64_C(0x0007f494e643cd8e),UINT64_C(0x0007f4ac84e9c475),UINT64_C(0x0007f4c37dc9cd50),UINT64_C(0x0007f4d9d638a432),
+    UINT64_C(0x0007f4ef934a5b6a),UINT64_C(0x0007f504b9d5f33d),UINT64_C(0x0007f5194e78b352),UINT64_C(0x0007f52d55994a96),
+    UINT64_C(0x0007f540d36aba0c),UINT64_C(0x0007f553cbef0e77),UINT64_C(0x0007f56642f9ec8f),UINT64_C(0x0007f5783c32f31e),
+    UINT64_C(0x0007f589bb17f609),UINT64_C(0x0007f59ac2ff1525),UINT64_C(0x0007f5ab5718b15a),UINT64_C(0x0007f5bb7a71427c),
+    UINT64_C(0x0007f5cb2ff31009),UINT64_C(0x0007f5da7a67cebe),UINT64_C(0x0007f5e95c7a24e7),UINT64_C(0x0007f5f7d8b7171e),
+    UINT64_C(0x0007f605f18f5ef4),UINT64_C(0x0007f613a958ad0a),UINT64_C(0x0007f621024ed7e9),UINT64_C(0x0007f62dfe94f8cb),
+    UINT64_C(0x0007f63aa036777a),UINT64_C(0x0007f646e928065a),UINT64_C(0x0007f652db488f88),UINT64_C(0x0007f65e786213ff),
+    UINT64_C(0x0007f669c22a7d8a),UINT64_C(0x0007f674ba446459),UINT64_C(0x0007f67f623fc8db),UINT64_C(0x0007f689bb9ac294),
+    UINT64_C(0x0007f693c7c22481),UINT64_C(0x0007f69d881217a6),UINT64_C(0x0007f6a6fdd6ac36),UINT64_C(0x0007f6b02a4c61ee),
+    UINT64_C(0x0007f6b90ea0a7f4),UINT64_C(0x0007f6c1abf254c0),UINT64_C(0x0007f6ca03521664),UINT64_C(0x0007f6d215c2db82),
+    UINT64_C(0x0007f6d9e43a3559),UINT64_C(0x0007f6e16fa0b329),UINT64_C(0x0007f6e8b8d23729),UINT64_C(0x0007f6efc09e4569),
+    UINT64_C(0x0007f6f687c84cbf),UINT64_C(0x0007f6fd0f07ea09),UINT64_C(0x0007f703570925e2),UINT64_C(0x0007f709606cad03),
+    UINT64_C(0x0007f70f2bc8036f),UINT64_C(0x0007f714b9a5b292),UINT64_C(0x0007f71a0a85725d),UINT64_C(0x0007f71f1edc4d9e),
+    UINT64_C(0x0007f723f714c179),UINT64_C(0x0007f728938ed843),UINT64_C(0x0007f72cf4a03fa0),UINT64_C(0x0007f7311a945a16),
+    UINT64_C(0x0007f73505ac4bf8),UINT64_C(0x0007f738b61f03bd),UINT64_C(0x0007f73c2c193dc0),UINT64_C(0x0007f73f67bd835c),
+    UINT64_C(0x0007f74269242559),UINT64_C(0x0007f745305b31a1),UINT64_C(0x0007f747bd666428),UINT64_C(0x0007f74a103f12ed),
+    UINT64_C(0x0007f74c28d414f5),UINT64_C(0x0007f74e0709a42d),UINT64_C(0x0007f74faab939f9),UINT64_C(0x0007f75113b16657),
+    UINT64_C(0x0007f75241b5a155),UINT64_C(0x0007f753347e16b8),UINT64_C(0x0007f753ebb76b7c),UINT64_C(0x0007f75467027d05),
+    UINT64_C(0x0007f754a5f4199d),UINT64_C(0x0007f754a814b207),UINT64_C(0x0007f7546ce003ae),UINT64_C(0x0007f753f3c4bb29),
+    UINT64_C(0x0007f7533c240e92),UINT64_C(0x0007f75245514f41),UINT64_C(0x0007f7510e91726c),UINT64_C(0x0007f74f971a9012),
+    UINT64_C(0x0007f74dde135797),UINT64_C(0x0007f74be2927971),UINT64_C(0x0007f749a39e051c),UINT64_C(0x0007f747202aba8a),
+    UINT64_C(0x0007f744571b4e3c),UINT64_C(0x0007f741473f9efe),UINT64_C(0x0007f73def53dc43),UINT64_C(0x0007f73a4dff9bff),
+    UINT64_C(0x0007f73661d4deaf),UINT64_C(0x0007f732294f003f),UINT64_C(0x0007f72da2d19444),UINT64_C(0x0007f728cca72bda),
+    UINT64_C(0x0007f723a5000367),UINT64_C(0x0007f71e29f09627),UINT64_C(0x0007f7185970156b),UINT64_C(0x0007f7123156c102),
+    UINT64_C(0x0007f70baf5c1e2c),UINT64_C(0x0007f704d1150a23),UINT64_C(0x0007f6fd93f1a4e5),UINT64_C(0x0007f6f5f53b10b6),
+    UINT64_C(0x0007f6edf211023e),UINT64_C(0x0007f6e587671ce9),UINT64_C(0x0007f6dcb2021679),UINT64_C(0x0007f6d36e749c64),
+    UINT64_C(0x0007f6c9b91bf4c6),UINT64_C(0x0007f6bf8e1c541b),UINT64_C(0x0007f6b4e95ce015),UINT64_C(0x0007f6a9c68356ff),
+    UINT64_C(0x0007f69e20ef5211),UINT64_C(0x0007f691f3b517eb),UINT64_C(0x0007f6853997f321),UINT64_C(0x0007f677ed03ff19),
+    UINT64_C(0x0007f66a08075bdc),UINT64_C(0x0007f65b844ab75a),UINT64_C(0x0007f64c5b091860),UINT64_C(0x0007f63c8506d4bc),
+    UINT64_C(0x0007f62bfa8798fe),UINT64_C(0x0007f61ab34364b0),UINT64_C(0x0007f608a65a599a),UINT64_C(0x0007f5f5ca4737e8),
+    UINT64_C(0x0007f5e214d05b48),UINT64_C(0x0007f5cd7af7066e),UINT64_C(0x0007f5b7f0e4c2a1),UINT64_C(0x0007f5a169d68fcf),
+    UINT64_C(0x0007f589d80596a5),UINT64_C(0x0007f5712c8d0174),UINT64_C(0x0007f557574c912b),UINT64_C(0x0007f53c46c77193),
+    UINT64_C(0x0007f51fe7feb9f2),UINT64_C(0x0007f5022646ecfb),UINT64_C(0x0007f4e2eb17ab1d),UINT64_C(0x0007f4c21dd4a3d1),
+    UINT64_C(0x0007f49fa38ea394),UINT64_C(0x0007f47b5ebb62eb),UINT64_C(0x0007f4552ee27473),UINT64_C(0x0007f42cf03d58f5),
+    UINT64_C(0x0007f4027b48549f),UINT64_C(0x0007f3d5a44119df),UINT64_C(0x0007f3a63a8fb552),UINT64_C(0x0007f37408155100),
+    UINT64_C(0x0007f33ed05b55ec),UINT64_C(0x0007f3064f9c183e),UINT64_C(0x0007f2ca399c7ba1),UINT64_C(0x0007f28a384bb940),
+    UINT64_C(0x0007f245ea1b7a2b),UINT64_C(0x0007f1fcdffe8f1b),UINT64_C(0x0007f1ae9af758cd),UINT64_C(0x0007f15a8917f27e),
+    UINT64_C(0x0007f10001ccaaab),UINT64_C(0x0007f09e413c418a),UINT64_C(0x0007f034627733d7),UINT64_C(0x0007efc15815b8d5),
+    UINT64_C(0x0007ef43e2bf7f55),UINT64_C(0x0007eeba84e31dfe),UINT64_C(0x0007ee237294df89),UINT64_C(0x0007ed7c7c170141),
+    UINT64_C(0x0007ecc2f0d95d3a),UINT64_C(0x0007ebf377a46782),UINT64_C(0x0007eb09d6deb285),UINT64_C(0x0007ea00a4f17808),
+    UINT64_C(0x0007e8d0d3da63d6),UINT64_C(0x0007e771023b0fcf),UINT64_C(0x0007e5d46c2f08d8),UINT64_C(0x0007e3e937669691),
+    UINT64_C(0x0007e195978f1176),UINT64_C(0x0007deb2c0e05c1c),UINT64_C(0x0007db0362002a19),UINT64_C(0x0007d6202c151439),
+    UINT64_C(0x0007cf4b8f00a2cb),UINT64_C(0x0007c4fd24520efd),UINT64_C(0x0007b362fbf81816),UINT64_C(0x00078d2d25998e24),
+};
+
+static const double wi[] = {
+    1.7367254121602630e-15,9.5586603514556339e-17,1.2708704834810623e-16,
+    1.4909740962495474e-16,1.6658733631586268e-16,1.8136120810119029e-16,
+    1.9429720153135588e-16,2.0589500628482093e-16,2.1646860576895422e-16,
+    2.2622940392218116e-16,2.3532718914045892e-16,2.4387234557428771e-16,
+    2.5194879829274225e-16,2.5962199772528103e-16,2.6694407473648285e-16,
+    2.7395729685142446e-16,2.8069646002484804e-16,2.8719058904113930e-16,
+    2.9346417484728883e-16,2.9953809336782113e-16,3.0543030007192440e-16,
+    3.1115636338921572e-16,3.1672988018581815e-16,3.2216280350549905e-16,
+    3.2746570407939751e-16,3.3264798116841710e-16,3.3771803417353232e-16,
+    3.4268340353119356e-16,3.4755088731729758e-16,3.5232663846002031e-16,
+    3.5701624633953494e-16,3.6162480571598339e-16,3.6615697529653540e-16,
+    3.7061702777236077e-16,3.7500889278747798e-16,3.7933619401549554e-16,
+    3.8360228129677279e-16,3.8781025861250247e-16,3.9196300853257678e-16,
+    3.9606321366256378e-16,4.0011337552546690e-16,4.0411583124143332e-16,
+    4.0807276830960448e-16,4.1198623774807442e-16,4.1585816580828064e-16,
+    4.1969036444740733e-16,4.2348454071520708e-16,4.2724230518899761e-16,
+    4.3096517957162941e-16,4.3465460355128760e-16,4.3831194100854571e-16,
+    4.4193848564470665e-16,4.4553546609579137e-16,4.4910405058828750e-16,
+    4.5264535118571397e-16,4.5616042766900381e-16,4.5965029108849407e-16,
+    4.6311590702081647e-16,4.6655819856008752e-16,4.6997804906941950e-16,
+    4.7337630471583237e-16,4.7675377680908526e-16,4.8011124396270155e-16,
+    4.8344945409350080e-16,4.8676912627422087e-16,4.9007095245229938e-16,
+    4.9335559904654139e-16,4.9662370843221783e-16,4.9987590032409088e-16,
+    5.0311277306593187e-16,5.0633490483427195e-16,5.0954285476338923e-16,
+    5.1273716399787966e-16,5.1591835667857364e-16,5.1908694086703434e-16,
+    5.2224340941340417e-16,5.2538824077194543e-16,5.2852189976823820e-16,
+    5.3164483832166176e-16,5.3475749612647295e-16,5.3786030129452348e-16,
+    5.4095367096239933e-16,5.4403801186554671e-16,5.4711372088173611e-16,
+    5.5018118554603362e-16,5.5324078453927836e-16,5.5629288815190902e-16,
+    5.5933785872484621e-16,5.6237605106900435e-16,5.6540781286489604e-16,
+    5.6843348504368141e-16,5.7145340215092040e-16,5.7446789269419609e-16,
+    5.7747727947569648e-16,5.8048187991076857e-16,5.8348200633338921e-16,
+    5.8647796628943653e-16,5.8947006281858718e-16,5.9245859472561339e-16,
+    5.9544385684180598e-16,5.9842614027720281e-16,6.0140573266426640e-16,
+    6.0438291839361250e-16,6.0735797884236057e-16,6.1033119259564394e-16,
+    6.1330283566179110e-16,6.1627318168165963e-16,6.1924250213258470e-16,
+    6.2221106652737879e-16,6.2517914260879998e-16,6.2814699653988953e-16,
+    6.3111489309056042e-16,6.3408309582080600e-16,6.3705186726088149e-16,
+    6.4002146908880247e-16,6.4299216230548961e-16,6.4596420740788321e-16,
+    6.4893786456033965e-16,6.5191339376461587e-16,6.5489105502874154e-16,
+    6.5787110853507413e-16,6.6085381480782587e-16,6.6383943488035057e-16,
+    6.6682823046247459e-16,6.6982046410815579e-16,6.7281639938375311e-16,
+    6.7581630103719006e-16,6.7882043516829803e-16,6.8182906940062540e-16,
+    6.8484247305500383e-16,6.8786091732516637e-16,6.9088467545571690e-16,
+    6.9391402292275690e-16,6.9694923761748294e-16,6.9999060003307640e-16,
+    7.0303839345521508e-16,7.0609290415654822e-16,7.0915442159548734e-16,
+    7.1222323861967788e-16,7.1529965167453030e-16,7.1838396101720629e-16,
+    7.2147647093647067e-16,7.2457748997883870e-16,7.2768733118146927e-16,
+    7.3080631231227429e-16,7.3393475611774048e-16,7.3707299057898310e-16,
+    7.4022134917657997e-16,7.4338017116476479e-16,7.4654980185558890e-16,
+    7.4973059291369793e-16,7.5292290266240584e-16,7.5612709640179217e-16,
+    7.5934354673958895e-16,7.6257263393567558e-16,7.6581474626104873e-16,
+    7.6907028037219191e-16,7.7233964170182985e-16,7.7562324486711744e-16,
+    7.7892151409638524e-16,7.8223488367564108e-16,7.8556379841610841e-16,
+    7.8890871414417552e-16,7.9227009821522709e-16,7.9564843005293662e-16,
+    7.9904420171571300e-16,8.0245791849212591e-16,8.0589009952726568e-16,
+    8.0934127848215009e-16,8.1281200422845008e-16,8.1630284158098775e-16,
+    8.1981437207065329e-16,8.2334719476060504e-16,8.2690192710884700e-16,
+    8.3047920588053737e-16,8.3407968811366288e-16,8.3770405214202216e-16,
+    8.4135299867980282e-16,8.4502725197240968e-16,8.4872756101861549e-16,
+    8.5245470086955962e-16,8.5620947401062333e-16,8.5999271183276646e-16,
+    8.6380527620052589e-16,8.6764806112455816e-16,8.7152199454736980e-16,
+    8.7542804025171749e-16,8.7936719990210427e-16,8.8334051523084080e-16,
+    8.8734907038131345e-16,8.9139399442240861e-16,8.9547646404950677e-16,
+    8.9959770648910994e-16,9.0375900262601175e-16,9.0796169037400680e-16,
+    9.1220716831348461e-16,9.1649689962191353e-16,9.2083241632623076e-16,
+    9.2521532390956933e-16,9.2964730630864167e-16,9.3413013134252651e-16,
+    9.3866565661866598e-16,9.4325583596767065e-16,9.4790272646517382e-16,
+    9.5260849610662787e-16,9.5737543220974496e-16,9.6220595062948384e-16,
+    9.6710260588230542e-16,9.7206810229016259e-16,9.7710530627072088e-16,
+    9.8221725991905411e-16,9.8740719604806711e-16,9.9267855488079765e-16,
+    9.9803500261836449e-16,1.0034804521436181e-15,1.0090190861637457e-15,
+    1.0146553831467086e-15,1.0203941464683124e-15,1.0262405372613567e-15,
+    1.0322001115486456e-15,1.0382788623515399e-15,1.0444832676000471e-15,
+    1.0508203448355195e-15,1.0572977139009890e-15,1.0639236690676801e-15,
+    1.0707072623632994e-15,1.0776584002668106e-15,1.0847879564403425e-15,
+    1.0921079038149563e-15,1.0996314701785628e-15,1.1073733224935752e-15,
+    1.1153497865853155e-15,1.1235791107110833e-15,1.1320817840164846e-15,
+    1.1408809242582780e-15,1.1500027537839792e-15,1.1594771891449189e-15,
+    1.1693385786910960e-15,1.1796266352955801e-15,1.1903876299282890e-15,
+    1.2016759392543819e-15,1.2135560818666897e-15,1.2261054417450561e-15,
+    1.2394179789163251e-15,1.2536093926602567e-15,1.2688244814255010e-15,
+    1.2852479319096109e-15,1.3031206634689985e-15,1.3227655770195326e-15,
+    1.3446300925011171e-15,1.3693606835128518e-15,1.3979436672775240e-15,
+    1.4319989869661328e-15,1.4744848603597596e-15,1.5317872741611144e-15,
+    1.6227698675312968e-15
+};
+
+static const double fi[] = {
+    1.0000000000000000e+00,9.7710170126767082e-01,9.5987909180010600e-01,
+    9.4519895344229909e-01,9.3206007595922991e-01,9.1999150503934646e-01,
+    9.0872644005213032e-01,8.9809592189834297e-01,8.8798466075583282e-01,
+    8.7830965580891684e-01,8.6900868803685649e-01,8.6003362119633109e-01,
+    8.5134625845867751e-01,8.4291565311220373e-01,8.3471629298688299e-01,
+    8.2672683394622093e-01,8.1892919160370192e-01,8.1130787431265572e-01,
+    8.0384948317096383e-01,7.9654233042295841e-01,7.8937614356602404e-01,
+    7.8234183265480195e-01,7.7543130498118662e-01,7.6863731579848571e-01,
+    7.6195334683679483e-01,7.5537350650709567e-01,7.4889244721915638e-01,
+    7.4250529634015061e-01,7.3620759812686210e-01,7.2999526456147568e-01,
+    7.2386453346862967e-01,7.1781193263072152e-01,7.1183424887824798e-01,
+    7.0592850133275376e-01,7.0009191813651117e-01,6.9432191612611627e-01,
+    6.8861608300467136e-01,6.8297216164499430e-01,6.7738803621877308e-01,
+    6.7186171989708166e-01,6.6639134390874977e-01,6.6097514777666277e-01,
+    6.5561147057969693e-01,6.5029874311081637e-01,6.4503548082082196e-01,
+    6.3982027745305614e-01,6.3465179928762327e-01,6.2952877992483625e-01,
+    6.2445001554702606e-01,6.1941436060583399e-01,6.1442072388891344e-01,
+    6.0946806492577310e-01,6.0455539069746733e-01,5.9968175261912482e-01,
+    5.9484624376798689e-01,5.9004799633282545e-01,5.8528617926337090e-01,
+    5.8055999610079034e-01,5.7586868297235316e-01,5.7121150673525267e-01,
+    5.6658776325616389e-01,5.6199677581452390e-01,5.5743789361876550e-01,
+    5.5291049042583185e-01,5.4841396325526537e-01,5.4394773119002582e-01,
+    5.3951123425695158e-01,5.3510393238045717e-01,5.3072530440366150e-01,
+    5.2637484717168403e-01,5.2205207467232140e-01,5.1775651722975591e-01,
+    5.1348772074732651e-01,5.0924524599574761e-01,5.0502866794346790e-01,
+    5.0083757512614835e-01,4.9667156905248933e-01,4.9253026364386815e-01,
+    4.8841328470545758e-01,4.8432026942668288e-01,4.8025086590904642e-01,
+    4.7620473271950547e-01,4.7218153846772976e-01,4.6818096140569321e-01,
+    4.6420268904817391e-01,4.6024641781284248e-01,4.5631185267871610e-01,
+    4.5239870686184824e-01,4.4850670150720273e-01,4.4463556539573912e-01,
+    4.4078503466580377e-01,4.3695485254798533e-01,4.3314476911265209e-01,
+    4.2935454102944126e-01,4.2558393133802180e-01,4.2183270922949573e-01,
+    4.1810064983784795e-01,4.1438753404089090e-01,4.1069314827018799e-01,
+    4.0701728432947315e-01,4.0335973922111429e-01,3.9972031498019700e-01,
+    3.9609881851583223e-01,3.9249506145931540e-01,3.8890886001878855e-01,
+    3.8534003484007706e-01,3.8178841087339344e-01,3.7825381724561896e-01,
+    3.7473608713789086e-01,3.7123505766823922e-01,3.6775056977903225e-01,
+    3.6428246812900372e-01,3.6083060098964775e-01,3.5739482014578022e-01,
+    3.5397498080007656e-01,3.5057094148140588e-01,3.4718256395679348e-01,
+    3.4380971314685055e-01,3.4045225704452164e-01,3.3711006663700588e-01,
+    3.3378301583071823e-01,3.3047098137916342e-01,3.2717384281360129e-01,
+    3.2389148237639104e-01,3.2062378495690530e-01,3.1737063802991350e-01,
+    3.1413193159633707e-01,3.1090755812628634e-01,3.0769741250429189e-01,
+    3.0450139197664983e-01,3.0131939610080288e-01,2.9815132669668531e-01,
+    2.9499708779996164e-01,2.9185658561709499e-01,2.8872972848218270e-01,
+    2.8561642681550159e-01,2.8251659308370741e-01,2.7943014176163772e-01,
+    2.7635698929566810e-01,2.7329705406857691e-01,2.7025025636587519e-01,
+    2.6721651834356114e-01,2.6419576399726080e-01,2.6118791913272082e-01,
+    2.5819291133761890e-01,2.5521066995466168e-01,2.5224112605594190e-01,
+    2.4928421241852824e-01,2.4633986350126363e-01,2.4340801542275012e-01,
+    2.4048860594050039e-01,2.3758157443123795e-01,2.3468686187232990e-01,
+    2.3180441082433859e-01,2.2893416541468023e-01,2.2607607132238020e-01,
+    2.2323007576391746e-01,2.2039612748015194e-01,2.1757417672433113e-01,
+    2.1476417525117358e-01,2.1196607630703015e-01,2.0917983462112499e-01,
+    2.0640540639788071e-01,2.0364274931033485e-01,2.0089182249465656e-01,
+    1.9815258654577511e-01,1.9542500351413428e-01,1.9270903690358912e-01,
+    1.9000465167046496e-01,1.8731181422380025e-01,1.8463049242679927e-01,
+    1.8196065559952254e-01,1.7930227452284767e-01,1.7665532144373500e-01,
+    1.7401977008183875e-01,1.7139559563750595e-01,1.6878277480121151e-01,
+    1.6618128576448205e-01,1.6359110823236570e-01,1.6101222343751107e-01,
+    1.5844461415592431e-01,1.5588826472447920e-01,1.5334316106026283e-01,
+    1.5080929068184568e-01,1.4828664273257453e-01,1.4577520800599403e-01,
+    1.4327497897351341e-01,1.4078594981444470e-01,1.3830811644855071e-01,
+    1.3584147657125373e-01,1.3338602969166913e-01,1.3094177717364430e-01,
+    1.2850872227999952e-01,1.2608687022018586e-01,1.2367622820159654e-01,
+    1.2127680548479021e-01,1.1888861344290998e-01,1.1651166562561080e-01,
+    1.1414597782783835e-01,1.1179156816383801e-01,1.0944845714681163e-01,
+    1.0711666777468364e-01,1.0479622562248690e-01,1.0248715894193508e-01,
+    1.0018949876880981e-01,9.7903279038862284e-02,9.5628536713008819e-02,
+    9.3365311912690860e-02,9.1113648066373634e-02,8.8873592068275789e-02,
+    8.6645194450557961e-02,8.4428509570353374e-02,8.2223595813202863e-02,
+    8.0030515814663056e-02,7.7849336702096039e-02,7.5680130358927067e-02,
+    7.3522973713981268e-02,7.1377949058890375e-02,6.9245144397006769e-02,
+    6.7124653827788497e-02,6.5016577971242842e-02,6.2921024437758113e-02,
+    6.0838108349539864e-02,5.8767952920933758e-02,5.6710690106202902e-02,
+    5.4666461324888914e-02,5.2635418276792176e-02,5.0617723860947761e-02,
+    4.8613553215868521e-02,4.6623094901930368e-02,4.4646552251294443e-02,
+    4.2684144916474431e-02,4.0736110655940933e-02,3.8802707404526113e-02,
+    3.6884215688567284e-02,3.4980941461716084e-02,3.3093219458578522e-02,
+    3.1221417191920245e-02,2.9365939758133314e-02,2.7527235669603082e-02,
+    2.5705804008548896e-02,2.3902203305795882e-02,2.2117062707308864e-02,
+    2.0351096230044517e-02,1.8605121275724643e-02,1.6880083152543166e-02,
+    1.5177088307935325e-02,1.3497450601739880e-02,1.1842757857907888e-02,
+    1.0214971439701471e-02,8.6165827693987316e-03,7.0508754713732268e-03,
+    5.5224032992509968e-03,4.0379725933630305e-03,2.6090727461021627e-03,
+    1.2602859304985975e-03
+};
+
+static const double ziggurat_nor_r      = 3.6541528853610088;
+static const double ziggurat_nor_inv_r  = 0.2736612373297583;
+
+static inline double uniform(AVLFG *lfg)
+{
+    return (double)av_lfg_get(lfg) / UINT_MAX;
+}
+
+static inline double ziggurat(AVLFG *lfg)
+{
+    while (1) {
+        uint64_t r = (((uint64_t)av_lfg_get(lfg) << 32) + av_lfg_get(lfg)) & 0x000fffffffffffff;
+        int64_t rabs = (int64_t) (r >> 1);
+        uint8_t idx = rabs & 0xff;
+        double x = ((r & 1) ? -rabs : rabs) * wi[idx];
+        if (rabs < ki[idx])
+            return x;
+        else {
+            if (!idx) {
+                while(1) {
+                    double xx = -ziggurat_nor_inv_r * log(uniform(lfg));
+                    double yy = -log(uniform(lfg));
+                    if (2 * yy > xx * xx)
+                        return ((rabs >> 8) & 1) ? -ziggurat_nor_r - xx : ziggurat_nor_r + xx;
+                }
+            }
+            else if ((fi[idx-1] - fi[idx]) * uniform(lfg) + fi[idx] < exp(-0.5*x*x))
+                return x;
+            else
+                continue;
+        }
+    }
+}
+
 void av_bmg_get(AVLFG *lfg, double out[2])
 {
     double x1, x2, w;
@@ -59,6 +355,12 @@ void av_bmg_get(AVLFG *lfg, double out[2])
     out[1] = x2 * w;
 }
 
+void av_gaussian_get(AVLFG *lfg, double *out, int len)
+{
+    for (int i = 0; i < len; ++i)
+        out[i] = ziggurat(lfg);
+}
+
 #ifdef TEST
 #include "log.h"
 #include "timer.h"
@@ -91,7 +393,7 @@ int main(void)
 
         for (i = 0; i < 1000; i += 2) {
             double bmg_out[2];
-            av_bmg_get(&state, bmg_out);
+            av_gaussian_get(&state, bmg_out, 2);
             samp0 = bmg_out[0] * stddev + mean;
             samp1 = bmg_out[1] * stddev + mean;
             samp_mean += samp0 + samp1;
diff --git a/libavutil/lfg.h b/libavutil/lfg.h
index ec90562..0a74b86 100644
--- a/libavutil/lfg.h
+++ b/libavutil/lfg.h
@@ -59,4 +59,13 @@ static inline unsigned int av_mlfg_get(AVLFG *c){
  */
 void av_bmg_get(AVLFG *lfg, double out[2]);
 
+/**
+ * Get numbers generated independently from a standard Gaussian distribution
+ * generator using the random numbers issued by lfg.
+ *
+ * @param len number of Gaussian samples
+ * @param out array where the generated samples are placed
+ */
+void av_gaussian_get(AVLFG *lfg, double *out, int len);
+
 #endif /* AVUTIL_LFG_H */
diff --git a/libavutil/version.h b/libavutil/version.h
index 53e74bc..f1e9e40 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -64,7 +64,7 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  55
-#define LIBAVUTIL_VERSION_MINOR  19
+#define LIBAVUTIL_VERSION_MINOR  20
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
-- 
2.7.2



More information about the ffmpeg-devel mailing list