diff --git a/Makefile b/Makefile index 516670e..126d98b 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,7 @@ endif COMMON_SOURCES = \ common/common_block.c \ common/common_frame.c \ + common/entcode.c \ common/transform.c \ common/intra_prediction.c \ common/inter_prediction.c \ @@ -31,6 +32,7 @@ COMMON_SOURCES = \ ENCODER_SOURCES = \ enc/encode_block.c \ enc/encode_frame.c \ + enc/entenc.c \ enc/mainenc.c \ enc/putbits.c \ enc/putvlc.c \ @@ -41,6 +43,7 @@ ENCODER_SOURCES = \ DECODER_SOURCES = \ dec/decode_block.c \ + dec/entdec.c \ dec/getbits.c \ dec/getvlc.c \ dec/maindec.c \ diff --git a/common/entcode.c b/common/entcode.c new file mode 100644 index 0000000..28faf87 --- /dev/null +++ b/common/entcode.c @@ -0,0 +1,91 @@ +/*Daala video codec +Copyright (c) 2001-2012 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "entcode.h" + +/*CDFs for uniform probability distributions of small sizes (2 through 16, + inclusive).*/ +const uint16_t OD_UNIFORM_CDFS_Q15[135] = { + 16384, 32768, + 10923, 21845, 32768, + 8192, 16384, 24576, 32768, + 6554, 13107, 19661, 26214, 32768, + 5461, 10923, 16384, 21845, 27307, 32768, + 4681, 9362, 14043, 18725, 23406, 28087, 32768, + 4096, 8192, 12288, 16384, 20480, 24576, 28672, 32768, + 3641, 7282, 10923, 14564, 18204, 21845, 25486, 29127, 32768, + 3277, 6554, 9830, 13107, 16384, 19661, 22938, 26214, 29491, 32768, + 2979, 5958, 8937, 11916, 14895, 17873, 20852, 23831, 26810, 29789, 32768, + 2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, 24576, 27307, 30037, + 32768, + 2521, 5041, 7562, 10082, 12603, 15124, 17644, 20165, 22686, 25206, 27727, + 30247, 32768, + 2341, 4681, 7022, 9362, 11703, 14043, 16384, 18725, 21065, 23406, 25746, + 28087, 30427, 32768, + 2185, 4369, 6554, 8738, 10923, 13107, 15292, 17476, 19661, 21845, 24030, + 26214, 28399, 30583, 32768, + 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, 20480, 22528, + 24576, 26624, 28672, 30720, 32768 +}; + +/*Given the current total integer number of bits used and the current value of + rng, computes the fraction number of bits used to OD_BITRES precision. + This is used by od_ec_enc_tell_frac() and od_ec_dec_tell_frac(). + nbits_total: The number of whole bits currently used, i.e., the value + returned by od_ec_enc_tell() or od_ec_dec_tell(). + rng: The current value of rng from either the encoder or decoder state. + Return: The number of bits scaled by 2**OD_BITRES. + This will always be slightly larger than the exact value (e.g., all + rounding error is in the positive direction).*/ +uint32_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) { + uint32_t nbits; + int l; + int i; + /*To handle the non-integral number of bits still left in the encoder/decoder + state, we compute the worst-case number of bits of val that must be + encoded to ensure that the value is inside the range for any possible + subsequent bits. + The computation here is independent of val itself (the decoder does not + even track that value), even though the real number of bits used after + od_ec_enc_done() may be 1 smaller if rng is a power of two and the + corresponding trailing bits of val are all zeros. + If we did try to track that special case, then coding a value with a + probability of 1/(1 << n) might sometimes appear to use more than n bits. + This may help explain the surprising result that a newly initialized + encoder or decoder claims to have used 1 bit.*/ + nbits = nbits_total << OD_BITRES; + l = 0; + for (i = OD_BITRES; i-- > 0;) { + int b; + rng = rng*rng >> 15; + b = (int)(rng >> 16); + l = l << 1 | b; + rng >>= b; + } + return nbits - l; +} diff --git a/common/entcode.h b/common/entcode.h new file mode 100644 index 0000000..eed6c6f --- /dev/null +++ b/common/entcode.h @@ -0,0 +1,120 @@ +/*Daala video codec +Copyright (c) 2001-2013 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#if !defined(_entcode_H) +# define _entcode_H (1) +# include +# include +# include +# include "odintrin.h" + +/*Set this flag 1 to enable a "reduced overhead" version of the entropy coder. + This uses a partition function that more accurately follows the input + probability estimates at the expense of some additional CPU cost (though + still an order of magnitude less than a full division). + + In classic arithmetic coding, the partition function maps a value x in the + range [0, ft] to a value in y in [0, r] with 0 < ft <= r via + y = x*r/ft. + Any deviation from this value increases coding inefficiency. + + To avoid divisions, we require ft <= r < 2*ft (enforcing it by shifting up + ft if necessary), and replace that function with + y = x + OD_MINI(x, r - ft). + This counts values of x smaller than r - ft double compared to values larger + than r - ft, which over-estimates the probability of symbols at the start of + the alphabet, and under-estimates the probability of symbols at the end of + the alphabet. + The overall coding inefficiency assuming accurate probability models and + independent symbols is in the 1% range, which is similar to that of CABAC. + + To reduce overhead even further, we split this into two cases: + 1) r - ft > ft - (r - ft). + That is, we have more values of x that are double-counted than + single-counted. + In this case, we still double-count the first 2*r - 3*ft values of x, but + after that we alternate between single-counting and double-counting for + the rest. + 2) r - ft < ft - (r - ft). + That is, we have more values of x that are single-counted than + double-counted. + In this case, we alternate between single-counting and double-counting for + the first 2*(r - ft) values of x, and single-count the rest. + For two equiprobable symbols in different places in the alphabet, this + reduces the maximum ratio of over-estimation to under-estimation from 2:1 + for the previous partition function to either 4:3 or 3:2 (for each of the + two cases above, respectively), assuming symbol probabilities significantly + greater than 1/32768. + That reduces the worst-case per-symbol overhead from 1 bit to 0.58 bits. + + The resulting function is + e = OD_MAXI(2*r - 3*ft, 0); + y = x + OD_MINI(x, e) + OD_MINI(OD_MAXI(x - e, 0) >> 1, r - ft). + Here, e is a value that is greater than 0 in case 1, and 0 in case 2. + This function is about 3 times as expensive to evaluate as the high-overhead + version, but still an order of magnitude cheaper than a division, since it + is composed only of very simple operations. + Because we want to fit in 16-bit registers and must use unsigned values to do + so, we use saturating subtraction to enforce the maximums with 0. + + Enabling this reduces the measured overhead in ectest from 0.805% to 0.621% + (vs. 0.022% for the division-based partition function with r much greater + than ft). + It improves performance on ntt-short-1 by about 0.3%.*/ +# define OD_EC_REDUCED_OVERHEAD (0) + +/*OPT: od_ec_window must be at least 32 bits, but if you have fast arithmetic + on a larger type, you can speed up the decoder by using it here.*/ +typedef uint32_t od_ec_window; + +# define OD_EC_WINDOW_SIZE ((int)sizeof(od_ec_window)*CHAR_BIT) + +/*Unsigned subtraction with unsigned saturation. + This implementation of the macro is intentionally chosen to increase the + number of common subexpressions in the reduced-overhead partition function. + This matters for C code, but it would not for hardware with a saturating + subtraction instruction.*/ +#define OD_SUBSATU(a, b) ((a) - OD_MINI(a, b)) + +/*The number of bits to use for the range-coded part of unsigned integers.*/ +# define OD_EC_UINT_BITS (4) + +/*The resolution of fractional-precision bit usage measurements, i.e., + 3 => 1/8th bits.*/ +# define OD_BITRES (3) + +extern const uint16_t OD_UNIFORM_CDFS_Q15[135]; + +/*Returns a Q15 CDF for a uniform probability distribution of the given size. + n: The size of the distribution. + This must be at least 2, and no more than 16.*/ +# define OD_UNIFORM_CDF_Q15(n) \ + (OD_UNIFORM_CDFS_Q15 + ((n)*((n) - 1) >> 1) - 1) + +/*See entcode.c for further documentation.*/ + +OD_WARN_UNUSED_RESULT uint32_t od_ec_tell_frac(uint32_t nbits_total, + uint32_t rng); + +#endif diff --git a/common/global.h b/common/global.h index c59e814..29e1655 100644 --- a/common/global.h +++ b/common/global.h @@ -40,7 +40,7 @@ static inline void fatalerror(char error_text[]) fprintf(stderr,"Run-time error...\n"); fprintf(stderr,"%s\n",error_text); fprintf(stderr,"...now exiting to system...\n"); - exit(1); + abort(); } #ifndef max diff --git a/common/odintrin.h b/common/odintrin.h new file mode 100644 index 0000000..c613959 --- /dev/null +++ b/common/odintrin.h @@ -0,0 +1,329 @@ +/*Daala video codec +Copyright (c) 2003-2013 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +/*Some common macros for potential platform-specific optimization.*/ +#if !defined(_odintrin_H) +# define _odintrin_H (1) + +# if defined(_MSC_VER) +# define _USE_MATH_DEFINES +# endif + +# include +# include +# include +# include "global.h" +# include "simd.h" + +# if defined(__GNUC__) && defined(__GNUC_MINOR__) \ + && defined(__GNUC_PATCHLEVEL__) +# define OD_GNUC_PREREQ(maj, min, pat) \ + ((__GNUC__ << 16) + (__GNUC_MINOR__ << 8) + __GNUC_PATCHLEVEL__ >= \ + ((maj) << 16) + ((min) << 8) + pat) +# else +# define OD_GNUC_PREREQ(maj, min, pat) (0) +# endif + +# if OD_GNUC_PREREQ(3, 4, 0) +# define OD_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# else +# define OD_WARN_UNUSED_RESULT +# endif +# if OD_GNUC_PREREQ(3, 4, 0) +# define OD_ARG_NONNULL(x) __attribute__((__nonnull__(x))) +# else +# define OD_ARG_NONNULL(x) +# endif + +# if defined(OD_ENABLE_ASSERTIONS) + +# define OD_M2STR_WRAPPER(_m) #_m +# define OD_M2STR(_m) OD_M2STR_WRAPPER(_m) + +# define OD_FATAL(_str) \ + (fatalerror("Fatal (internal) error in " \ + __FILE__ ", line " OD_M2STR(__LINE__) ": " _str)) + +# define OD_ASSERT(_cond) \ + do { \ + if (!(_cond)) { \ + OD_FATAL("assertion failed: " # _cond); \ + } \ + } \ + while (0) + +# define OD_ASSERT2(_cond, _message) \ + do { \ + if (!(_cond)) { \ + OD_FATAL("assertion failed: " # _cond "\n" _message); \ + } \ + } \ + while (0) + +# define OD_ALWAYS_TRUE(_cond) OD_ASSERT(_cond) + +# else +# define OD_ASSERT(_cond) +# define OD_ASSERT2(_cond, _message) +# define OD_ALWAYS_TRUE(_cond) ((void)(_cond)) +# endif + +# if !defined(M_PI) +# define M_PI (3.1415926535897932384626433832795) +# endif + +# if !defined(M_SQRT2) +# define M_SQRT2 (1.41421356237309504880168872420970) +# endif + +# if !defined(M_SQRT1_2) +# define M_SQRT1_2 (0.70710678118654752440084436210485) +# endif + +# if !defined(M_LOG2E) +# define M_LOG2E (1.4426950408889634073599246810019) +# endif + +/*Some specific platforms may have optimized intrinsic or inline assembly + versions of these functions which can substantially improve performance. + We define macros for them to allow easy incorporation of these non-ANSI + features.*/ + +/*Note that we do not provide a macro for abs(), because it is provided as a + library function, which we assume is translated into an intrinsic to avoid + the function call overhead and then implemented in the smartest way for the + target platform. + With modern gcc (4.x), this is true: it uses cmov instructions if the + architecture supports it and branchless bit-twiddling if it does not (the + speed difference between the two approaches is not measurable). + Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150) + by Sun Microsystems, despite prior art dating back to at least 1996: + http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT + On gcc 3.x, however, our assumption is not true, as abs() is translated to a + conditional jump, which is horrible on deeply piplined architectures (e.g., + all consumer architectures for the past decade or more). + Also be warned that -C*abs(x) where C is a constant is mis-optimized as + abs(C*x) on every gcc release before 4.2.3. + See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */ + +/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if + given an appropriate architecture, but the branchless bit-twiddling versions + are just as fast, and do not require any special target architecture. + Earlier gcc versions (3.x) compiled both code to the same assembly + instructions, because of the way they represented ((b) > (a)) internally.*/ +/*#define OD_MAXI(a, b) ((a) < (b) ? (b) : (a))*/ +# define OD_MAXI(a, b) ((a) ^ (((a) ^ (b)) & -((b) > (a)))) +/*#define OD_MINI(a, b) ((a) > (b) ? (b) : (a))*/ +# define OD_MINI(a, b) ((a) ^ (((b) ^ (a)) & -((b) < (a)))) +/*This has a chance of compiling branchless, and is just as fast as the + bit-twiddling method, which is slightly less portable, since it relies on a + sign-extended rightshift, which is not guaranteed by ANSI (but present on + every relevant platform).*/ +# define OD_SIGNI(a) (((a) > 0) - ((a) < 0)) +/*Slightly more portable than relying on a sign-extended right-shift (which is + not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both) + compile it into the right-shift anyway.*/ +# define OD_SIGNMASK(a) (-((a) < 0)) +/*Unlike copysign(), simply inverts the sign of a if b is negative.*/ +# define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b)) +# define OD_COPYSIGNI(a, b) OD_FLIPSIGNI(abs(a), b) +/*Clamps an integer into the given range. + If a > c, then the lower bound a is respected over the upper bound c (this + behavior is required to meet our documented API behavior). + a: The lower bound. + b: The value to clamp. + c: The upper boud.*/ +# define OD_CLAMPI(a, b, c) (OD_MAXI(a, OD_MINI(b, c))) +/*Clamps a signed integer between 0 and 255, returning an unsigned char. + This assumes a char is 8 bits.*/ +# define OD_CLAMP255(x) \ + ((unsigned char)((((x) < 0) - 1) & ((x) | -((x) > 255)))) +/*Divides a signed integer by a positive value with exact rounding.*/ +# define OD_DIV_ROUND(x, y) (((x) + OD_FLIPSIGNI((y) >> 1, x))/(y)) +# define OD_DIV_R0(x, y) (((x) + OD_FLIPSIGNI((((y) + 1) >> 1) - 1, (x)))/(y)) +# define OD_DIV_RE(x, y) \ + (((x) + OD_FLIPSIGNI((((y) + 1) >> 1) - 1 + ((x)/(y) & 1), (x)))/(y)) +/*Divides an integer by a power of two, truncating towards 0. + dividend: The integer to divide. + shift: The non-negative power of two to divide by. + rmask: (1 << shift) - 1*/ +# define OD_DIV_POW2(dividend, shift, rmask) \ + (((dividend) + (OD_SIGNMASK(dividend) & (rmask))) >> (shift)) +/*Divides x by 65536, truncating towards 0.*/ +# define OD_DIV2_16(x) OD_DIV_POW2(x, 16, 0xFFFF) +/*Divides x by 2, truncating towards 0.*/ +# define OD_DIV2(x) OD_DIV_POW2(x, 1, 0x1) +/*Divides x by 8, truncating towards 0.*/ +# define OD_DIV8(x) OD_DIV_POW2(x, 3, 0x7) +/*Divides x by 16, truncating towards 0.*/ +# define OD_DIV16(x) OD_DIV_POW2(x, 4, 0xF) +/*Right shifts dividend by shift, adding rval, and subtracting one for + negative dividends first. + When rval is (1 << (shift - 1)), this is equivalent to division with rounding + ties away from zero.*/ +# define OD_DIV_ROUND_POW2(dividend, shift, rval) \ + (((dividend) + OD_SIGNMASK(dividend) + (rval)) >> (shift)) +/*Divides a x by 2, rounding towards even numbers.*/ +# define OD_DIV2_RE(x) ((x) + ((x) >> 1 & 1) >> 1) +/*Divides a x by (1 << (shift)), rounding towards even numbers.*/ +# define OD_DIV_POW2_RE(x, shift) \ + ((x) + (((1 << (shift)) + ((x) >> (shift) & 1) - 1) >> 1) >> (shift)) +/*Count leading zeros. + This macro should only be used for implementing od_ilog(), if it is defined. + All other code should use OD_ILOG() instead.*/ +# if defined(_MSC_VER) +# include +# if !defined(snprintf) +# define snprintf _snprintf +# endif +/*In _DEBUG mode this is not an intrinsic by default.*/ +# pragma intrinsic(_BitScanReverse) + +static __inline int od_bsr(unsigned long x) { + unsigned long ret; + _BitScanReverse(&ret, x); + return (int)ret; +} +# define OD_CLZ0 (1) +# define OD_CLZ(x) (-od_bsr(x)) +# elif defined(ENABLE_TI_DSPLIB) +# include "dsplib.h" +# define OD_CLZ0 (31) +# define OD_CLZ(x) (_lnorm(x)) +# elif OD_GNUC_PREREQ(3, 4, 0) +# if INT_MAX >= 2147483647 +# define OD_CLZ0 ((int)sizeof(unsigned)*CHAR_BIT) +# define OD_CLZ(x) (__builtin_clz(x)) +# elif LONG_MAX >= 2147483647L +# define OD_CLZ0 ((int)sizeof(unsigned long)*CHAR_BIT) +# define OD_CLZ(x) (__builtin_clzl(x)) +# endif +# endif +# if defined(OD_CLZ) +# define OD_ILOG_NZ(x) (OD_CLZ0 - OD_CLZ(x)) +/*Note that __builtin_clz is not defined when x == 0, according to the gcc + documentation (and that of the x86 BSR instruction that implements it), so + we have to special-case it. + We define a special version of the macro to use when x can be zero.*/ +# define OD_ILOG(x) (OD_ILOG_NZ(x) & -!!(x)) +# else +# define OD_ILOG_NZ(x) (1 + log2i(x)) +# define OD_ILOG(x) (1 + log2i(x)) +# endif + +# define OD_LOG2(x) (M_LOG2E*log(x)) + +/*Swaps two integers a and b if a > b.*/ +/*#define OD_SORT2I(a, b) \ + if ((a) > (b)) { \ + int t__; \ + t__ = (a); \ + (a) = (b); \ + (b) = t__; \ + }*/ +/*This branchless version is significantly faster than the above + straightforward implementation on modern processors.*/ +# define OD_SORT2I(a, b) \ + do { \ + int t__; \ + t__ = ((a) ^ (b)) & -((b) < (a)); \ + (a) ^= t__; \ + (b) ^= t__; \ + } \ + while (0) + +/*All of these macros should expect floats as arguments.*/ +/*These two should compile as a single SSE instruction.*/ +# define OD_MINF(a, b) ((a) < (b) ? (a) : (b)) +# define OD_MAXF(a, b) ((a) > (b) ? (a) : (b)) +# define OD_CLAMPF(a, b, c) (OD_MAXF(a, OD_MINF(b, c))) +# if defined(__GNUC__) +# define OD_FABSF(f) (fabsf(f)) +# define OD_SQRTF(f) (sqrtf(f)) +# define OD_POWF(b, e) (powf(b, e)) +# define OD_LOGF(f) (logf(f)) +# define OD_IFLOORF(f) (floorf(f)) +# define OD_ICEILF(f) (ceilf(f)) +# else +# define OD_FABSF(f) ((float)fabs(f)) +# define OD_SQRTF(f) ((float)sqrt(f)) +# define OD_POWF(b, e) ((float)pow(b, e)) +# define OD_LOGF(f) ((float)log(f)) +# define OD_IFLOORF(f) ((int)floor(f)) +# define OD_ICEILF(f) ((int)ceil(f)) +# endif + +/** Copy n elements of memory from src to dst. The 0* term provides + compile-time type checking */ +#if !defined(OVERRIDE_OD_COPY) +# define OD_COPY(dst, src, n) \ + (memcpy((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src)))) +#endif + +/** Copy n elements of memory from src to dst, allowing overlapping regions. + The 0* term provides compile-time type checking */ +#if !defined(OVERRIDE_OD_MOVE) +# define OD_MOVE(dst, src, n) \ + (memmove((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src)) )) +#endif + +/** Set n elements of dst to zero */ +#if !defined(OVERRIDE_OD_CLEAR) +# define OD_CLEAR(dst, n) (memset((dst), 0, sizeof(*(dst))*(n))) +#endif + +/** Linkage will break without this if using a C++ compiler, and will issue + * warnings without this for a C compiler*/ +#if defined(__cplusplus) +# define OD_EXTERN extern +#else +# define OD_EXTERN +#endif + +/*Some assembly constructs require aligned operands. + The following macros are _only_ intended for structure member declarations. + Although they will sometimes work on stack variables, gcc will often silently + ignore them. + A separate set of macros could be made for manual stack alignment, but we + don't actually require it anywhere.*/ +# if defined(OD_X86ASM)||defined(OD_ARMASM) +# if defined(__GNUC__) +# define OD_ALIGN8(expr) expr __attribute__((aligned(8))) +# define OD_ALIGN16(expr) expr __attribute__((aligned(16))) +# elif defined(_MSC_VER) +# define OD_ALIGN8(expr) __declspec (align(8)) expr +# define OD_ALIGN16(expr) __declspec (align(16)) expr +# else +# error "Alignment macros required for this platform." +# endif +# endif + +# if !defined(OD_ALIGN8) +# define OD_ALIGN8(expr) expr +# endif +# if !defined(OD_ALIGN16) +# define OD_ALIGN16(expr) expr +# endif + +#endif diff --git a/dec/decode_block.c b/dec/decode_block.c index c658093..a88387b 100644 --- a/dec/decode_block.c +++ b/dec/decode_block.c @@ -469,7 +469,7 @@ void decode_block(decoder_info_t *decoder_info,int size,int ypos,int xpos){ } -int decode_super_mode(decoder_info_t *decoder_info, int size, int decode_rectangular_size){ +int decode_super_mode(decoder_info_t *decoder_info, int size, int decode_this_size){ stream_t *stream = decoder_info->stream; block_context_t *block_context = decoder_info->block_context; @@ -484,13 +484,13 @@ int decode_super_mode(decoder_info_t *decoder_info, int size, int decode_rectang if (frame_type==I_FRAME){ decoder_info->mode = MODE_INTRA; - if (size > MIN_BLOCK_SIZE) + if (size > MIN_BLOCK_SIZE && decode_this_size) split_flag = getbits(stream, 1); else - split_flag = 0; + split_flag = !decode_this_size; return split_flag; } - if (decode_rectangular_size){ + if (!decode_this_size){ split_flag = !getbits(stream,1); return split_flag; } @@ -640,7 +640,7 @@ void process_block_dec(decoder_info_t *decoder_info,int size,int yposY,int xposY int decode_this_size = (yposY + size <= height) && (xposY + size <= width); int decode_rectangular_size = !decode_this_size && frame_type != I_FRAME; - int bit_start = stream->bitcnt; + int bit_start = od_ec_dec_tell(&stream->ec); int mode = MODE_SKIP; @@ -648,7 +648,7 @@ void process_block_dec(decoder_info_t *decoder_info,int size,int yposY,int xposY find_block_contexts(yposY, xposY, height, width, size, decoder_info->deblock_data, &block_context, decoder_info->use_block_contexts); decoder_info->block_context = &block_context; - split_flag = decode_super_mode(decoder_info,size,decode_rectangular_size); + split_flag = decode_super_mode(decoder_info,size,decode_this_size); mode = decoder_info->mode; /* Read delta_qp and set block-level qp */ @@ -658,7 +658,8 @@ void process_block_dec(decoder_info_t *decoder_info,int size,int yposY,int xposY decoder_info->frame_info.qpb = decoder_info->frame_info.qp + delta_qp; } - decoder_info->bit_count.super_mode[decoder_info->bit_count.stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.super_mode[decoder_info->bit_count.stat_frame_type] + += (od_ec_dec_tell(&stream->ec) - bit_start); if (split_flag){ int new_size = size/2; diff --git a/dec/decode_frame.c b/dec/decode_frame.c index dcff858..9e0ee67 100644 --- a/dec/decode_frame.c +++ b/dec/decode_frame.c @@ -52,7 +52,7 @@ void decode_frame(decoder_info_t *decoder_info) stream_t *stream = decoder_info->stream; memset(decoder_info->deblock_data, 0, ((height/MIN_PB_SIZE) * (width/MIN_PB_SIZE) * sizeof(deblock_data_t)) ); - int bit_start = stream->bitcnt; + int bit_start = od_ec_dec_tell(&stream->ec); decoder_info->frame_info.frame_type = getbits(stream,1); int qp = getbits(stream,8); @@ -92,7 +92,8 @@ void decode_frame(decoder_info_t *decoder_info) decoder_info->bit_count.stat_frame_type = decoder_info->frame_info.frame_type; if (decoder_info->frame_info.frame_type != I_FRAME && decoder_info->num_reorder_pics > 0 && decoder_info->frame_info.display_frame_num%(decoder_info->num_reorder_pics+1)) decoder_info->bit_count.stat_frame_type = B_FRAME; - decoder_info->bit_count.frame_header[decoder_info->bit_count.stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.frame_header[decoder_info->bit_count.stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); decoder_info->bit_count.frame_type[decoder_info->bit_count.stat_frame_type] += 1; decoder_info->frame_info.qp = qp; decoder_info->frame_info.qpb = qp; diff --git a/dec/entdec.c b/dec/entdec.c new file mode 100644 index 0000000..adeab64 --- /dev/null +++ b/dec/entdec.c @@ -0,0 +1,566 @@ +/*Daala video codec +Copyright (c) 2001-2013 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "entdec.h" + +/*A range decoder. + This is an entropy decoder based upon \cite{Mar79}, which is itself a + rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}. + It is very similar to arithmetic encoding, except that encoding is done with + digits in any base, instead of with bits, and so it is faster when using + larger bases (i.e.: a byte). + The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$ + is the base, longer than the theoretical optimum, but to my knowledge there + is no published justification for this claim. + This only seems true when using near-infinite precision arithmetic so that + the process is carried out with no rounding errors. + + An excellent description of implementation details is available at + http://www.arturocampos.com/ac_range.html + A recent work \cite{MNW98} which proposes several changes to arithmetic + encoding for efficiency actually re-discovers many of the principles + behind range encoding, and presents a good theoretical analysis of them. + + End of stream is handled by writing out the smallest number of bits that + ensures that the stream will be correctly decoded regardless of the value of + any subsequent bits. + od_ec_dec_tell() can be used to determine how many bits were needed to decode + all the symbols thus far; other data can be packed in the remaining bits of + the input buffer. + @PHDTHESIS{Pas76, + author="Richard Clark Pasco", + title="Source coding algorithms for fast data compression", + school="Dept. of Electrical Engineering, Stanford University", + address="Stanford, CA", + month=May, + year=1976, + URL="http://www.richpasco.org/scaffdc.pdf" + } + @INPROCEEDINGS{Mar79, + author="Martin, G.N.N.", + title="Range encoding: an algorithm for removing redundancy from a digitised + message", + booktitle="Video & Data Recording Conference", + year=1979, + address="Southampton", + month=Jul, + URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz" + } + @ARTICLE{MNW98, + author="Alistair Moffat and Radford Neal and Ian H. Witten", + title="Arithmetic Coding Revisited", + journal="{ACM} Transactions on Information Systems", + year=1998, + volume=16, + number=3, + pages="256--294", + month=Jul, + URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf" + }*/ + +/*This is meant to be a large, positive constant that can still be efficiently + loaded as an immediate (on platforms like ARM, for example). + Even relatively modest values like 100 would work fine.*/ +#define OD_EC_LOTS_OF_BITS (0x4000) + +static void od_ec_dec_refill(od_ec_dec *dec) { + int s; + od_ec_window dif; + int16_t cnt; + const unsigned char *bptr; + const unsigned char *end; + dif = dec->dif; + cnt = dec->cnt; + bptr = dec->bptr; + end = dec->end; + s = OD_EC_WINDOW_SIZE - 9 - (cnt + 15); + for (; s >= 0 && bptr < end; s -= 8, bptr++) { + OD_ASSERT(s <= OD_EC_WINDOW_SIZE - 8); + dif |= (od_ec_window)bptr[0] << s; + cnt += 8; + } + if (bptr >= end) { + dec->tell_offs += OD_EC_LOTS_OF_BITS - cnt; + cnt = OD_EC_LOTS_OF_BITS; + } + dec->dif = dif; + dec->cnt = cnt; + dec->bptr = bptr; +} + +/*Takes updated dif and range values, renormalizes them so that + 32768 <= rng < 65536 (reading more bytes from the stream into dif if + necessary), and stores them back in the decoder context. + dif: The new value of dif. + rng: The new value of the range. + ret: The value to return. + Return: ret. + This allows the compiler to jump to this function via a tail-call.*/ +static int od_ec_dec_normalize(od_ec_dec *dec, + od_ec_window dif, unsigned rng, int ret) { + int d; + OD_ASSERT(rng <= 65535U); + d = 16 - OD_ILOG_NZ(rng); + dec->cnt -= d; + dec->dif = dif << d; + dec->rng = rng << d; + if (dec->cnt < 0) od_ec_dec_refill(dec); + return ret; +} + +/*Initializes the decoder. + buf: The input buffer to use. + Return: 0 on success, or a negative value on error.*/ +void od_ec_dec_init(od_ec_dec *dec, + const unsigned char *buf, uint32_t storage) { + dec->buf = buf; + dec->eptr = buf + storage; + dec->end_window = 0; + dec->nend_bits = 0; + dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8); + dec->end = buf + storage; + dec->bptr = buf; + dec->dif = 0; + dec->rng = 0x8000; + dec->cnt = -15; + dec->error = 0; + od_ec_dec_refill(dec); +} + +/*Decode a bit that has an fz/ft probability of being a zero. + fz: The probability that the bit is zero, scaled by _ft. + ft: The total probability. + This must be at least 16384 and no more than 32768. + Return: The value decoded (0 or 1).*/ +int od_ec_decode_bool(od_ec_dec *dec, unsigned fz, unsigned ft) { + od_ec_window dif; + od_ec_window vw; + unsigned r; + int s; + unsigned v; + int ret; + OD_ASSERT(0 < fz); + OD_ASSERT(fz < ft); + OD_ASSERT(16384 <= ft); + OD_ASSERT(ft <= 32768U); + dif = dec->dif; + r = dec->rng; + OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r); + OD_ASSERT(ft <= r); + s = r - ft >= ft; + ft <<= s; + fz <<= s; + OD_ASSERT(r - ft < ft); +#if OD_EC_REDUCED_OVERHEAD + { + unsigned d; + unsigned e; + d = r - ft; + e = OD_SUBSATU(2*d, ft); + v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d); + } +#else + v = fz + OD_MINI(fz, r - ft); +#endif + vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16); + ret = dif >= vw; + if (ret) dif -= vw; + r = ret ? r - v : v; + return od_ec_dec_normalize(dec, dif, r, ret); +} + +/*Equivalent to od_ec_decode_bool() with ft == 32768. + fz: The probability that the bit is zero, scaled by 32768. + Return: The value decoded (0 or 1).*/ +int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned fz) { + od_ec_window dif; + od_ec_window vw; + unsigned r; + unsigned v; + int ret; + OD_ASSERT(0 < fz); + OD_ASSERT(fz < 32768U); + dif = dec->dif; + r = dec->rng; + OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r); + OD_ASSERT(32768U <= r); +#if OD_EC_REDUCED_OVERHEAD + { + unsigned d; + unsigned e; + d = r - 32768U; + e = OD_SUBSATU(2*d, 32768U); + v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d); + } +#else + v = fz + OD_MINI(fz, r - 32768U); +#endif + vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16); + ret = dif >= vw; + if (ret) dif -= vw; + r = ret ? r - v : v; + return od_ec_dec_normalize(dec, dif, r, ret); +} + +/*Decodes a symbol given a cumulative distribution function (CDF) table. + cdf: The CDF, such that symbol s falls in the range + [s > 0 ? cdf[s - 1] : 0, cdf[s]). + The values must be monotonically non-increasing, and cdf[nsyms - 1] + must be at least 16384, and no more than 32768. + nsyms: The number of symbols in the alphabet. + This should be at most 16. + Return: The decoded symbol s.*/ +int od_ec_decode_cdf(od_ec_dec *dec, const uint16_t *cdf, int nsyms) { + od_ec_window dif; + unsigned r; + unsigned c; + unsigned d; +#if OD_EC_REDUCED_OVERHEAD + unsigned e; +#endif + int s; + unsigned u; + unsigned v; + unsigned q; + unsigned fl; + unsigned fh; + unsigned ft; + int ret; + dif = dec->dif; + r = dec->rng; + OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r); + OD_ASSERT(nsyms > 0); + ft = cdf[nsyms - 1]; + OD_ASSERT(16384 <= ft); + OD_ASSERT(ft <= 32768U); + OD_ASSERT(ft <= r); + s = r - ft >= ft; + ft <<= s; + d = r - ft; + OD_ASSERT(d < ft); + c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16)); + q = OD_MAXI((int)(c >> 1), (int)(c - d)); +#if OD_EC_REDUCED_OVERHEAD + e = OD_SUBSATU(2*d, ft); + /*The correctness of this inverse partition function is not obvious, but it + was checked exhaustively for all possible values of r, ft, and c. + TODO: It should be possible to optimize this better than the compiler, + given that we do not care about the accuracy of negative results (as we + will not use them). + It would also be nice to get rid of the 32-bit dividend, as it requires a + 32x32->64 bit multiply to invert.*/ + q = OD_MAXI((int)q, (int)((2*(int32_t)c + 1 - (int32_t)e)/3)); +#endif + q >>= s; + OD_ASSERT(q < ft >> s); + fl = 0; + ret = 0; + for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh; + OD_ASSERT(fh <= ft >> s); + fl <<= s; + fh <<= s; +#if OD_EC_REDUCED_OVERHEAD + u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d); + v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d); +#else + u = fl + OD_MINI(fl, d); + v = fh + OD_MINI(fh, d); +#endif + r = v - u; + dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16); + return od_ec_dec_normalize(dec, dif, r, ret); +} + +/*Decodes a symbol given a cumulative distribution function (CDF) table. + cdf: The CDF, such that symbol s falls in the range + [s > 0 ? cdf[s - 1] : 0, cdf[s]). + The values must be monotonically non-increasing, and cdf[nsyms - 1] + must be 32768. + nsyms: The number of symbols in the alphabet. + This should be at most 16. + Return: The decoded symbol s.*/ +int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *cdf, int nsyms) { + od_ec_window dif; + unsigned r; + unsigned c; + unsigned d; +#if OD_EC_REDUCED_OVERHEAD + unsigned e; +#endif + unsigned u; + unsigned v; + unsigned q; + unsigned fl; + unsigned fh; + int ret; + (void)nsyms; + dif = dec->dif; + r = dec->rng; + OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r); + OD_ASSERT(nsyms > 0); + OD_ASSERT(cdf[nsyms - 1] == 32768U); + OD_ASSERT(32768U <= r); + d = r - 32768U; + OD_ASSERT(d < 32768U); + c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16)); + q = OD_MAXI((int)(c >> 1), (int)(c - d)); +#if OD_EC_REDUCED_OVERHEAD + e = OD_SUBSATU(2*d, 32768U); + /*TODO: See TODO above.*/ + q = OD_MAXI((int)q, (int)((2*(int32_t)c + 1 - (int32_t)e)/3)); +#endif + OD_ASSERT(q < 32768U); + fl = 0; + ret = 0; + for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh; + OD_ASSERT(fh <= 32768U); +#if OD_EC_REDUCED_OVERHEAD + u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d); + v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d); +#else + u = fl + OD_MINI(fl, d); + v = fh + OD_MINI(fh, d); +#endif + r = v - u; + dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16); + return od_ec_dec_normalize(dec, dif, r, ret); +} + +/*Decodes a symbol given a cumulative distribution function (CDF) table. + cdf: The CDF, such that symbol s falls in the range + [s > 0 ? cdf[s - 1] : 0, cdf[s]). + The values must be monotonically non-increasing, and cdf[nsyms - 1] + must be at least 2, and no more than 32768. + nsyms: The number of symbols in the alphabet. + This should be at most 16. + Return: The decoded symbol s.*/ +int od_ec_decode_cdf_unscaled(od_ec_dec *dec, + const uint16_t *cdf, int nsyms) { + od_ec_window dif; + unsigned r; + unsigned c; + unsigned d; +#if OD_EC_REDUCED_OVERHEAD + unsigned e; +#endif + int s; + unsigned u; + unsigned v; + unsigned q; + unsigned fl; + unsigned fh; + unsigned ft; + int ret; + dif = dec->dif; + r = dec->rng; + OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r); + OD_ASSERT(nsyms > 0); + ft = cdf[nsyms - 1]; + OD_ASSERT(2 <= ft); + OD_ASSERT(ft <= 32768U); + s = 15 - OD_ILOG_NZ(ft - 1); + ft <<= s; + OD_ASSERT(ft <= r); + if (r - ft >= ft) { + ft <<= 1; + s++; + } + d = r - ft; + OD_ASSERT(d < ft); + c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16)); + q = OD_MAXI((int)(c >> 1), (int)(c - d)); +#if OD_EC_REDUCED_OVERHEAD + e = OD_SUBSATU(2*d, ft); + /*TODO: See TODO above.*/ + q = OD_MAXI((int)q, (int)((2*(int32_t)c + 1 - (int32_t)e)/3)); +#endif + q >>= s; + OD_ASSERT(q < ft >> s); + fl = 0; + ret = 0; + for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh; + OD_ASSERT(fh <= ft >> s); + fl <<= s; + fh <<= s; +#if OD_EC_REDUCED_OVERHEAD + u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d); + v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d); +#else + u = fl + OD_MINI(fl, d); + v = fh + OD_MINI(fh, d); +#endif + r = v - u; + dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16); + return od_ec_dec_normalize(dec, dif, r, ret); +} + +/*Decodes a symbol given a cumulative distribution function (CDF) table. + cdf: The CDF, such that symbol s falls in the range + [s > 0 ? cdf[s - 1] : 0, cdf[s]). + The values must be monotonically non-increasing, and cdf[nsyms - 1] + must be exactly 1 << ftb. + nsyms: The number of symbols in the alphabet. + This should be at most 16. + ftb: The number of bits of precision in the cumulative distribution. + This must be no more than 15. + Return: The decoded symbol s.*/ +int od_ec_decode_cdf_unscaled_dyadic(od_ec_dec *dec, + const uint16_t *cdf, int nsyms, unsigned ftb) { + od_ec_window dif; + unsigned r; + unsigned c; + unsigned d; +#if OD_EC_REDUCED_OVERHEAD + unsigned e; +#endif + int s; + unsigned u; + unsigned v; + unsigned q; + unsigned fl; + unsigned fh; + int ret; + (void)nsyms; + dif = dec->dif; + r = dec->rng; + OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r); + OD_ASSERT(ftb <= 15); + OD_ASSERT(cdf[nsyms - 1] == 1U << ftb); + s = 15 - ftb; + OD_ASSERT(32768U <= r); + d = r - 32768U; + OD_ASSERT(d < 32768U); + c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16)); + q = OD_MAXI((int)(c >> 1), (int)(c - d)); +#if OD_EC_REDUCED_OVERHEAD + e = OD_SUBSATU(2*d, 32768U); + /*TODO: See TODO above.*/ + q = OD_MAXI((int)q, (int)((2*(int32_t)c + 1 - (int32_t)e)/3)); +#endif + q >>= s; + OD_ASSERT(q < 1U << ftb); + fl = 0; + ret = 0; + for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh; + OD_ASSERT(fh <= 1U << ftb); + fl <<= s; + fh <<= s; +#if OD_EC_REDUCED_OVERHEAD + u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d); + v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d); +#else + u = fl + OD_MINI(fl, d); + v = fh + OD_MINI(fh, d); +#endif + r = v - u; + dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16); + return od_ec_dec_normalize(dec, dif, r, ret); +} + +/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream. + The integer must have been encoded with od_ec_enc_uint(). + ft: The number of integers that can be decoded (one more than the max). + This must be at least 2, and no more than 2**29. + Return: The decoded bits.*/ +uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft) { + OD_ASSERT(ft >= 2); + OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS)); + if (ft > 1U << OD_EC_UINT_BITS) { + uint32_t t; + int ft1; + int ftb; + ft--; + ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS; + ft1 = (int)(ft >> ftb) + 1; + t = od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft1), ft1); + t = t << ftb | od_ec_dec_bits(dec, ftb); + if (t <= ft) return t; + dec->error = 1; + return ft; + } + return od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft), (int)ft); +} + +/*Extracts a sequence of raw bits from the stream. + The bits must have been encoded with od_ec_enc_bits(). + ftb: The number of bits to extract. + This must be between 0 and 25, inclusive. + Return: The decoded bits.*/ +uint32_t od_ec_dec_bits(od_ec_dec *dec, unsigned ftb) { + od_ec_window window; + int available; + uint32_t ret; + OD_ASSERT(ftb <= 25); + window = dec->end_window; + available = dec->nend_bits; + if ((unsigned)available < ftb) { + const unsigned char *buf; + const unsigned char *eptr; + buf = dec->buf; + eptr = dec->eptr; + OD_ASSERT(available <= OD_EC_WINDOW_SIZE - 8); + do { + if (eptr <= buf) { + dec->tell_offs += OD_EC_LOTS_OF_BITS - available; + available = OD_EC_LOTS_OF_BITS; + break; + } + window |= (od_ec_window)*--eptr << available; + available += 8; + } + while (available <= OD_EC_WINDOW_SIZE - 8); + dec->eptr = eptr; + } + ret = (uint32_t)window & (((uint32_t)1 << ftb) - 1); + window >>= ftb; + available -= ftb; + dec->end_window = window; + dec->nend_bits = available; + return ret; +} + +/*Returns the number of bits "used" by the decoded symbols so far. + This same number can be computed in either the encoder or the decoder, and is + suitable for making coding decisions. + Return: The number of bits. + This will always be slightly larger than the exact value (e.g., all + rounding error is in the positive direction).*/ +int od_ec_dec_tell(od_ec_dec *dec) { + return ((dec->end - dec->eptr) + (dec->bptr - dec->buf))*8 + - dec->cnt - dec->nend_bits + dec->tell_offs; +} + +/*Returns the number of bits "used" by the decoded symbols so far. + This same number can be computed in either the encoder or the decoder, and is + suitable for making coding decisions. + Return: The number of bits scaled by 2**OD_BITRES. + This will always be slightly larger than the exact value (e.g., all + rounding error is in the positive direction).*/ +uint32_t od_ec_dec_tell_frac(od_ec_dec *dec) { + return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng); +} diff --git a/dec/entdec.h b/dec/entdec.h new file mode 100644 index 0000000..bc57eab --- /dev/null +++ b/dec/entdec.h @@ -0,0 +1,91 @@ +/*Daala video codec +Copyright (c) 2001-2013 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#if !defined(_entdec_H) +# define _entdec_H (1) +# include +# include "entcode.h" + +typedef struct od_ec_dec od_ec_dec; + +/*The entropy decoder context.*/ +struct od_ec_dec { + /*The start of the current input buffer.*/ + const unsigned char *buf; + /*The read pointer for the raw bits.*/ + const unsigned char *eptr; + /*Bits that will be read from/written at the end.*/ + od_ec_window end_window; + /*Number of valid bits in end_window.*/ + int nend_bits; + /*An offset used to keep track of tell after reaching the end of the stream. + This is constant throughout most of the decoding process, but becomes + important once we hit the end of the buffer and stop incrementing pointers + (and instead pretend cnt/nend_bits have lots of bits).*/ + int32_t tell_offs; + /*The end of the current input buffer.*/ + const unsigned char *end; + /*The read pointer for the entropy-coded bits.*/ + const unsigned char *bptr; + /*The difference between the coded value and the low end of the current + range.*/ + od_ec_window dif; + /*The number of values in the current range.*/ + uint16_t rng; + /*The number of bits of data in the current value.*/ + int16_t cnt; + /*Nonzero if an error occurred.*/ + int error; +}; + +/*See entdec.c for further documentation.*/ + +void od_ec_dec_init(od_ec_dec *dec, + const unsigned char *buf, uint32_t storage) + OD_ARG_NONNULL(1) OD_ARG_NONNULL(2); + +OD_WARN_UNUSED_RESULT int od_ec_decode_bool(od_ec_dec *dec, unsigned fz, + unsigned ft) OD_ARG_NONNULL(1); +OD_WARN_UNUSED_RESULT int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned fz) + OD_ARG_NONNULL(1); +OD_WARN_UNUSED_RESULT int od_ec_decode_cdf(od_ec_dec *dec, + const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(2); +OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_q15(od_ec_dec *dec, + const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(2); +OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled(od_ec_dec *dec, + const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(2); +OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled_dyadic(od_ec_dec *dec, + const uint16_t *cdf, int nsyms, unsigned _ftb) + OD_ARG_NONNULL(1) OD_ARG_NONNULL(2); + +OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_uint(od_ec_dec *dec, + uint32_t ft) OD_ARG_NONNULL(1); + +OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits(od_ec_dec *dec, + unsigned ftb) OD_ARG_NONNULL(1); + +OD_WARN_UNUSED_RESULT int od_ec_dec_tell(od_ec_dec *dec) OD_ARG_NONNULL(1); +uint32_t od_ec_dec_tell_frac(od_ec_dec *dec) OD_ARG_NONNULL(1); + +#endif diff --git a/dec/getbits.c b/dec/getbits.c index 34bd0c3..104b3f0 100644 --- a/dec/getbits.c +++ b/dec/getbits.c @@ -24,123 +24,80 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include #include "global.h" #include "getbits.h" - -/* to mask the n least significant bits of an integer */ -static const unsigned int msk[33] = -{ - 0x00000000,0x00000001,0x00000003,0x00000007, - 0x0000000f,0x0000001f,0x0000003f,0x0000007f, - 0x000000ff,0x000001ff,0x000003ff,0x000007ff, - 0x00000fff,0x00001fff,0x00003fff,0x00007fff, - 0x0000ffff,0x0001ffff,0x0003ffff,0x0007ffff, - 0x000fffff,0x001fffff,0x003fffff,0x007fffff, - 0x00ffffff,0x01ffffff,0x03ffffff,0x07ffffff, - 0x0fffffff,0x1fffffff,0x3fffffff,0x7fffffff, - 0xffffffff -}; - int initbits_dec(FILE *infile, stream_t *str) { - fpos_t fpos[1]; - long pos1,pos2; - - str->incnt = 0; - str->rdptr = str->rdbfr + 2048; - str->bitcnt = 0; + uint8_t frame_bytes_buf[4]; + uint32_t length; + int ret; str->infile = infile; - - fgetpos(str->infile,fpos); - pos1 = ftell(str->infile); - fseek(str->infile,0,SEEK_END); - pos2 = ftell(str->infile); - fsetpos(str->infile,fpos); - str->length = pos2 - pos1; - return 0; -} - -int fillbfr(stream_t *str) -{ - //int l; - - while (str->incnt <= 24 && (str->rdptr < str->rdbfr + 2048)) - { - str->inbfr = (str->inbfr << 8) | *str->rdptr++; - str->incnt += 8; - } - - if (str->rdptr >= str->rdbfr + 2048) - { - //l = (int)fread(str->rdbfr,sizeof(unsigned char),2048,str->infile); - fread(str->rdbfr,sizeof(unsigned char),2048,str->infile); - str->rdptr = str->rdbfr; - - while (str->incnt <= 24 && (str->rdptr < str->rdbfr + 2048)) - { - str->inbfr = (str->inbfr << 8) | *str->rdptr++; - str->incnt += 8; - } - } - - return 0; -} - -unsigned int getbits(stream_t *str, int n) -{ - - if (str->incnt < n) + length = 0; + ret = fread(frame_bytes_buf, sizeof(frame_bytes_buf), 1, infile) != 1; + if (!ret) { - fillbfr(str); - if (str->incnt < n) + unsigned char *buf; + length = frame_bytes_buf[0] << 24 | frame_bytes_buf[1] << 16 + | frame_bytes_buf[2] << 8 | frame_bytes_buf[3]; + buf = realloc(str->buf, sizeof(*buf)*length); + ret = buf == NULL; + if (!ret) { - unsigned int l = str->inbfr; - unsigned int k = *str->rdptr++; - int shift = n-str->incnt; - str->inbfr = (str->inbfr << 8) | k; - str->incnt = str->incnt - n + 8; - str->bitcnt += n; - return (((l << shift) | (k >> (8-shift))) & msk[n]); + ret = fread(buf, sizeof(*buf), length, str->infile) != length; + if (!ret) + { + od_ec_dec_init(&str->ec, buf, length); + } } } - - str->incnt -= n; - str->bitcnt += n; - return ((str->inbfr >> str->incnt) & msk[n]); + return ret; } -unsigned int getbits1(stream_t *str) -{ - if (str->incnt < 1) - { - fillbfr(str); - } - str->incnt--; - str->bitcnt++; - return ((str->inbfr >> str->incnt) & 1); -} +/*This is meant to be a large, positive constant that can still be efficiently + loaded as an immediate (on platforms like ARM, for example). + Even relatively modest values like 100 would work fine.*/ +#define OD_EC_LOTS_OF_BITS (0x4000) unsigned int showbits(stream_t *str, int n) { - if (str->incnt < n) - { - fillbfr(str); - if (str->incnt < n) - { - int shift = n-str->incnt; - return (((str->inbfr << shift) | (str->rdptr[0] >> (8-shift))) & msk[n]); + od_ec_window window; + int available; + uint32_t ret; + OD_ASSERT(n <= 25); + window = str->ec.end_window; + available = str->ec.nend_bits; + if ((unsigned)available < n) { + const unsigned char *buf; + const unsigned char *eptr; + buf = str->ec.buf; + eptr = str->ec.eptr; + OD_ASSERT(available <= OD_EC_WINDOW_SIZE - 8); + do { + if (eptr <= buf) { + str->ec.tell_offs += OD_EC_LOTS_OF_BITS - available; + available = OD_EC_LOTS_OF_BITS; + break; + } + window |= (od_ec_window)*--eptr << available; + available += 8; } + while (available <= OD_EC_WINDOW_SIZE - 8); + str->ec.eptr = eptr; + str->ec.end_window = window; + str->ec.nend_bits = available; } - - return ((str->inbfr >> (str->incnt-n)) & msk[n]); + ret = (uint32_t)window & (((uint32_t)1 << n) - 1); + OD_ASSERT(n > 0); + return bitreverse(ret << (32 - n)); } -int flushbits(stream_t *str, int n) +void flushbits(stream_t *str, int n) { - str->incnt -= n; - str->bitcnt += n; - return 0; + OD_ASSERT(str->ec.nend_bits >= n); + str->ec.end_window >>= n; + str->ec.nend_bits -= n; } diff --git a/dec/getbits.h b/dec/getbits.h index 1377b11..18f98ea 100644 --- a/dec/getbits.h +++ b/dec/getbits.h @@ -28,23 +28,38 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define _GETBITS_H_ #include +#include "entdec.h" typedef struct { + od_ec_dec ec; FILE *infile; - unsigned char rdbfr[2051]; - unsigned char *rdptr; - unsigned int inbfr; - int incnt; - int bitcnt; - int length; + unsigned char *buf; } stream_t; int initbits_dec(FILE *infile, stream_t *str); -int fillbfr(stream_t *str); + +static inline unsigned bitreverse(unsigned val) +{ + val = ((val >> 16) & 0x0000FFFFU) | ((val <<16) & 0xFFFF0000U); + val = ((val >> 8) & 0x00FF00FFU) | ((val << 8) & 0xFF00FF00U); + val = ((val >> 4) & 0x0F0F0F0FU) | ((val << 4) & 0xF0F0F0F0U); + val = ((val >> 2) & 0x33333333U) | ((val << 2) & 0xCCCCCCCCU); + return ((val >> 1) & 0x55555555U) | ((val << 1) & 0xAAAAAAAAUL); +} + unsigned int showbits(stream_t *str, int n); -unsigned int getbits1(stream_t *str); -int flushbits(stream_t *str, int n); -unsigned int getbits(stream_t *str, int n); + +static inline unsigned int getbits1(stream_t *str) +{ + return od_ec_dec_bits(&str->ec, 1); +} + +void flushbits(stream_t *str, int n); + +static inline unsigned int getbits(stream_t *str, int n) +{ + return n > 0 ? bitreverse(od_ec_dec_bits(&str->ec, n) << (32 - n)) : 0; +} #endif diff --git a/dec/maindec.c b/dec/maindec.c index 6a5dc0d..6fe39c3 100644 --- a/dec/maindec.c +++ b/dec/maindec.c @@ -126,6 +126,7 @@ int main(int argc, char** argv) int decode_frame_num = 0; int frame_count = 0; int last_frame_output = -1; + int done = 0; int width; int height; int r; @@ -139,13 +140,14 @@ int main(int argc, char** argv) int input_file_size = ftell(infile); fseek(infile, 0, SEEK_SET); + memset(&stream, 0, sizeof(stream)); initbits_dec(infile, &stream); decoder_info.stream = &stream; memset(&decoder_info.bit_count,0,sizeof(bit_count_t)); - int bit_start = stream.bitcnt; + int bit_start = od_ec_dec_tell(&stream.ec); /* Read sequence header */ width = getbits(&stream,16); height = getbits(&stream,16); @@ -176,7 +178,8 @@ int main(int argc, char** argv) decoder_info.use_block_contexts = getbits(&stream,1); decoder_info.bipred = getbits(&stream,1); - decoder_info.bit_count.sequence_header += (stream.bitcnt - bit_start); + decoder_info.bit_count.sequence_header += + (od_ec_dec_tell(&stream.ec) - bit_start); for (r=0;rframe_num = decoder_info.frame_info.display_frame_num; decode_frame(&decoder_info); + done = initbits_dec(infile, &stream); rec_available[rec_buffer_idx]=1; rec_buffer_idx = (last_frame_output+1)%MAX_REORDER_BUFFER; @@ -212,11 +216,12 @@ int main(int argc, char** argv) rec_available[rec_buffer_idx] = 0; } printf("decode_frame_num=%4d display_frame_num=%4d input_file_size=%12d bitcnt=%12d\n", - decode_frame_num,decoder_info.frame_info.display_frame_num,input_file_size,stream.bitcnt); + decode_frame_num,decoder_info.frame_info.display_frame_num,input_file_size,od_ec_dec_tell(&stream.ec)); decode_frame_num++; } frame_count++; } + while (!done); // Output the tail int i,j; for (i=1; i<=MAX_REORDER_BUFFER; ++i) { diff --git a/dec/read_bits.c b/dec/read_bits.c index db18d9a..ad73578 100644 --- a/dec/read_bits.c +++ b/dec/read_bits.c @@ -253,13 +253,12 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b int16_t *coeff_v = block_info->coeffq_v; zerovec.y = zerovec.x = 0; - bit_start = stream->bitcnt; mode = decoder_info->mode; int coeff_block_type = (mode == MODE_INTRA)<<1; /* Initialize bit counter for statistical purposes */ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); if (mode == MODE_SKIP){ /* Derive skip vector candidates and number of skip vector candidates from neighbour blocks */ @@ -291,7 +290,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b } else skip_idx = 0; - decoder_info->bit_count.skip_idx[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.skip_idx[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); block_info->num_skip_vec = num_skip_vec; block_info->pred_data.skip_idx = skip_idx; @@ -352,7 +352,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b } else skip_idx = 0; - decoder_info->bit_count.skip_idx[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.skip_idx[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); block_info->num_skip_vec = num_skip_vec; block_info->pred_data.skip_idx = skip_idx; @@ -450,7 +451,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b read_mv(stream,&mv_arr[2],&mvp2); read_mv(stream,&mv_arr[3],&mvp2); } - decoder_info->bit_count.mv[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.mv[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); block_info->pred_data.ref_idx0 = ref_idx; block_info->pred_data.ref_idx1 = ref_idx; block_info->pred_data.dir = 0; @@ -554,7 +556,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b block_info->pred_data.dir = 2; int combined_ref = block_info->pred_data.ref_idx0 * decoder_info->frame_info.num_ref + block_info->pred_data.ref_idx1; decoder_info->bit_count.bi_ref[stat_frame_type][combined_ref] += 1; - decoder_info->bit_count.mv[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.mv[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else if (mode==MODE_INTRA){ @@ -606,7 +609,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b intra_mode = intra_mode_map_inv[code]; } - decoder_info->bit_count.intra_mode[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.intra_mode[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); decoder_info->bit_count.size_and_intra_mode[stat_frame_type][log2i(size)-3][intra_mode] += 1; block_info->pred_data.intra_mode = intra_mode; @@ -623,7 +627,7 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b int tmp,cbp2; int cbp_table[8] = {1,0,5,2,6,3,7,4}; - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); code = get_vlc(0,stream); if (decoder_info->tb_split_enable && (mode==MODE_INTRA || (mode==MODE_INTER && PBpart==0))){ @@ -636,7 +640,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b tb_split = 0; } block_info->tb_split = tb_split; - decoder_info->bit_count.cbp[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.cbp[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); if (tb_split == 0){ tmp = 0; @@ -670,25 +675,28 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b block_info->cbp = cbp; if (cbp.y){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); read_coeff(stream,coeff_y,sizeY,coeff_block_type|0); - decoder_info->bit_count.coeff_y[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.coeff_y[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else memset(coeff_y,0,sizeY*sizeY*sizeof(int16_t)); if (cbp.u){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); read_coeff(stream,coeff_u,sizeC,coeff_block_type|1); - decoder_info->bit_count.coeff_u[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.coeff_u[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else memset(coeff_u,0,sizeC*sizeC*sizeof(int16_t)); if (cbp.v){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); read_coeff(stream,coeff_v,size/2,coeff_block_type|1); - decoder_info->bit_count.coeff_v[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.coeff_v[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else memset(coeff_v,0,sizeC*sizeC*sizeof(int16_t)); @@ -700,7 +708,7 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b /* Loop over 4 TUs */ for (index=0;index<4;index++){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); code = get_vlc(0,stream); int tmp = 0; while (code != cbp_table[tmp] && tmp < 8) tmp++; @@ -711,7 +719,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b cbp.v = ((tmp>>2)&1); /* Updating statistics for CBP */ - decoder_info->bit_count.cbp[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.cbp[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); decoder_info->bit_count.cbp_stat[stat_frame_type][cbp.y + (cbp.u<<1) + (cbp.v<<2)] += 1; /* Decode coefficients for this TU */ @@ -719,9 +728,10 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b /* Y */ coeff = coeff_y + index*sizeY/2*sizeY/2; if (cbp.y){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); read_coeff(stream,coeff,sizeY/2,coeff_block_type|0); - decoder_info->bit_count.coeff_y[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.coeff_y[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else{ memset(coeff,0,sizeY/2*sizeY/2*sizeof(int16_t)); @@ -730,9 +740,10 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b /* U */ coeff = coeff_u + index*sizeC/2*sizeC/2; if (cbp.u){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); read_coeff(stream,coeff,sizeC/2,coeff_block_type|1); - decoder_info->bit_count.coeff_u[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.coeff_u[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else{ memset(coeff,0,sizeC/2*sizeC/2*sizeof(int16_t)); @@ -741,9 +752,10 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b /* V */ coeff = coeff_v + index*sizeC/2*sizeC/2; if (cbp.v){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); read_coeff(stream,coeff,sizeC/2,coeff_block_type|1); - decoder_info->bit_count.coeff_v[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.coeff_v[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else{ memset(coeff,0,sizeC/2*sizeC/2*sizeof(int16_t)); @@ -759,23 +771,25 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b /* Loop over 4 TUs */ for (index=0;index<4;index++){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); cbp.y = getbits(stream,1); - decoder_info->bit_count.cbp[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.cbp[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); /* Y */ coeff = coeff_y + index*sizeY/2*sizeY/2; if (cbp.y){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); read_coeff(stream,coeff,sizeY/2,coeff_block_type|0); - decoder_info->bit_count.coeff_y[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.coeff_y[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else{ memset(coeff,0,sizeY/2*sizeY/2*sizeof(int16_t)); } } - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); int tmp; tmp = getbits(stream,1); if (tmp){ @@ -799,18 +813,21 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b } } } - decoder_info->bit_count.cbp[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.cbp[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); if (cbp.u){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); read_coeff(stream,coeff_u,sizeC,coeff_block_type|1); - decoder_info->bit_count.coeff_u[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.coeff_u[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else memset(coeff_u,0,sizeC*sizeC*sizeof(int16_t)); if (cbp.v){ - bit_start = stream->bitcnt; + bit_start = od_ec_dec_tell(&stream->ec); read_coeff(stream,coeff_v,size/2,coeff_block_type|1); - decoder_info->bit_count.coeff_v[stat_frame_type] += (stream->bitcnt - bit_start); + decoder_info->bit_count.coeff_v[stat_frame_type] += + (od_ec_dec_tell(&stream->ec) - bit_start); } else memset(coeff_v,0,sizeC*sizeC*sizeof(int16_t)); diff --git a/enc/encode_block.c b/enc/encode_block.c index dba8fc2..dc8c693 100644 --- a/enc/encode_block.c +++ b/enc/encode_block.c @@ -2942,6 +2942,9 @@ int process_block(encoder_info_t *encoder_info,int size,int ypos,int xpos,int qp write_data.size = size; write_data.block_context = block_info.block_context; write_data.frame_type = frame_type; + // We can't use encode_rectangular_size here directly, because it is + // never true for I frames. !encode_this_size is what we actually want. + write_data.encode_rectangular_size = !encode_this_size; write_super_mode(stream, &write_data, split_flag); } else{ diff --git a/enc/entenc.c b/enc/entenc.c new file mode 100644 index 0000000..f83250d --- /dev/null +++ b/enc/entenc.c @@ -0,0 +1,706 @@ +/*Daala video codec +Copyright (c) 2001-2013 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include "entenc.h" + +/*A range encoder. + See entdec.c and the references for implementation details \cite{Mar79,MNW98}. + + @INPROCEEDINGS{Mar79, + author="Martin, G.N.N.", + title="Range encoding: an algorithm for removing redundancy from a digitised + message", + booktitle="Video \& Data Recording Conference", + year=1979, + address="Southampton", + month=Jul, + URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz" + } + @ARTICLE{MNW98, + author="Alistair Moffat and Radford Neal and Ian H. Witten", + title="Arithmetic Coding Revisited", + journal="{ACM} Transactions on Information Systems", + year=1998, + volume=16, + number=3, + pages="256--294", + month=Jul, + URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf" + }*/ + +/*Takes updated low and range values, renormalizes them so that + 32768 <= rng < 65536 (flushing bytes from low to the pre-carry buffer if + necessary), and stores them back in the encoder context. + low: The new value of low. + rng: The new value of the range.*/ +static void od_ec_enc_normalize(od_ec_enc *enc, + od_ec_window low, unsigned rng) { + int d; + int c; + int s; + c = enc->cnt; + OD_ASSERT(rng <= 65535U); + d = 16 - OD_ILOG_NZ(rng); + s = c + d; + /*TODO: Right now we flush every time we have at least one byte available. + Instead we should use an od_ec_window and flush right before we're about to + shift bits off the end of the window. + For a 32-bit window this is about the same amount of work, but for a 64-bit + window it should be a fair win.*/ + if (s >= 0) { + uint16_t *buf; + uint32_t storage; + uint32_t offs; + unsigned m; + buf = enc->precarry_buf; + storage = enc->precarry_storage; + offs = enc->offs; + if (offs + 2 > storage) { + storage = 2*storage + 2; + buf = (uint16_t *)realloc(buf, sizeof(*buf)*storage); + if (buf == NULL) { + enc->error = -1; + enc->offs = 0; + return; + } + enc->precarry_buf = buf; + enc->precarry_storage = storage; + } + c += 16; + m = (1 << c) - 1; + if (s >= 8) { + OD_ASSERT(offs < storage); + buf[offs++] = (uint16_t)(low >> c); + low &= m; + c -= 8; + m >>= 8; + } + OD_ASSERT(offs < storage); + buf[offs++] = (uint16_t)(low >> c); + s = c + d - 24; + low &= m; + enc->offs = offs; + } + enc->low = low << d; + enc->rng = rng << d; + enc->cnt = s; +} + +/*Initializes the encoder. + size: The initial size of the buffer, in bytes.*/ +void od_ec_enc_init(od_ec_enc *enc, uint32_t size) { + od_ec_enc_reset(enc); + enc->buf = (unsigned char *)malloc(sizeof(*enc->buf)*size); + enc->storage = size; + if (size > 0 && enc->buf == NULL) { + enc->storage = 0; + enc->error = -1; + } + enc->precarry_buf = + (uint16_t *)malloc(sizeof(*enc->precarry_buf)*size); + enc->precarry_storage = size; + if (size > 0 && enc->precarry_buf == NULL) { + enc->precarry_storage = 0; + enc->error = -1; + } +} + +/*Reinitializes the encoder.*/ +void od_ec_enc_reset(od_ec_enc *enc) { + enc->end_offs = 0; + enc->end_window = 0; + enc->nend_bits = 0; + enc->offs = 0; + enc->low = 0; + enc->rng = 0x8000; + /*This is initialized to -9 so that it crosses zero after we've accumulated + one byte + one carry bit.*/ + enc->cnt = -9; + enc->error = 0; +#if OD_MEASURE_EC_OVERHEAD + enc->entropy = 0; + enc->nb_symbols = 0; +#endif +} + +/*Frees the buffers used by the encoder.*/ +void od_ec_enc_clear(od_ec_enc *enc) { + free(enc->precarry_buf); + free(enc->buf); +} + +/*Encodes a symbol given its scaled frequency information. + The frequency information must be discernable by the decoder, assuming it + has read only the previous symbols from the stream. + You can change the frequency information, or even the entire source alphabet, + so long as the decoder can tell from the context of the previously encoded + information that it is supposed to do so as well. + fl: The cumulative frequency of all symbols that come before the one to be + encoded. + fh: The cumulative frequency of all symbols up to and including the one to + be encoded. + Together with fl, this defines the range [fl, fh) in which the decoded + value will fall. + ft: The sum of the frequencies of all the symbols. + This must be at least 16384, and no more than 32768.*/ +static void od_ec_encode(od_ec_enc *enc, + unsigned fl, unsigned fh, unsigned ft) { + od_ec_window l; + unsigned r; + int s; + unsigned d; + unsigned u; + unsigned v; + OD_ASSERT(fl < fh); + OD_ASSERT(fh <= ft); + OD_ASSERT(16384 <= ft); + OD_ASSERT(ft <= 32768U); + l = enc->low; + r = enc->rng; + OD_ASSERT(ft <= r); + s = r - ft >= ft; + ft <<= s; + fl <<= s; + fh <<= s; + d = r - ft; + OD_ASSERT(d < ft); +#if OD_EC_REDUCED_OVERHEAD + { + unsigned e; + e = OD_SUBSATU(2*d, ft); + u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d); + v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d); + } +#else + u = fl + OD_MINI(fl, d); + v = fh + OD_MINI(fh, d); +#endif + r = v - u; + l += u; + od_ec_enc_normalize(enc, l, r); +#if OD_MEASURE_EC_OVERHEAD + enc->entropy -= OD_LOG2((double)(fh - fl)/ft); + enc->nb_symbols++; +#endif +} + +/*Equivalent to od_ec_encode() with ft == 32768. + fl: The cumulative frequency of all symbols that come before the one to be + encoded. + fh: The cumulative frequency of all symbols up to and including the one to + be encoded.*/ +static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) { + od_ec_window l; + unsigned r; + unsigned d; + unsigned u; + unsigned v; + OD_ASSERT(fl < fh); + OD_ASSERT(fh <= 32768U); + l = enc->low; + r = enc->rng; + OD_ASSERT(32768U <= r); + d = r - 32768U; + OD_ASSERT(d < 32768U); +#if OD_EC_REDUCED_OVERHEAD + { + unsigned e; + e = OD_SUBSATU(2*d, 32768U); + u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d); + v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d); + } +#else + u = fl + OD_MINI(fl, d); + v = fh + OD_MINI(fh, d); +#endif + r = v - u; + l += u; + od_ec_enc_normalize(enc, l, r); +#if OD_MEASURE_EC_OVERHEAD + enc->entropy -= OD_LOG2((double)(fh - fl)/32768.); + enc->nb_symbols++; +#endif +} + +/*Encodes a symbol given its frequency information with an arbitrary scale. + This operates just like od_ec_encode(), but does not require that ft be at + least 16384. + fl: The cumulative frequency of all symbols that come before the one to be + encoded. + fh: The cumulative frequency of all symbols up to and including the one to + be encoded. + ft: The sum of the frequencies of all the symbols. + This must be at least 2 and no more than 32768.*/ +static void od_ec_encode_unscaled(od_ec_enc *enc, + unsigned fl, unsigned fh, unsigned ft) { + int s; + OD_ASSERT(fl < fh); + OD_ASSERT(fh <= ft); + OD_ASSERT(2 <= ft); + OD_ASSERT(ft <= 32768U); + s = 15 - OD_ILOG_NZ(ft - 1); + od_ec_encode(enc, fl << s, fh << s, ft << s); +} + +/*Encode a bit that has an fz/ft probability of being a zero. + val: The value to encode (0 or 1). + fz: The probability that val is zero, scaled by ft. + ft: The total probability. + This must be at least 16384 and no more than 32768.*/ +void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned ft) { + od_ec_window l; + unsigned r; + int s; + unsigned v; + OD_ASSERT(0 < fz); + OD_ASSERT(fz < ft); + OD_ASSERT(16384 <= ft); + OD_ASSERT(ft <= 32768U); + l = enc->low; + r = enc->rng; + OD_ASSERT(ft <= r); + s = r - ft >= ft; + ft <<= s; + fz <<= s; + OD_ASSERT(r - ft < ft); +#if OD_EC_REDUCED_OVERHEAD + { + unsigned d; + unsigned e; + d = r - ft; + e = OD_SUBSATU(2*d, ft); + v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d); + } +#else + v = fz + OD_MINI(fz, r - ft); +#endif + if (val) l += v; + r = val ? r - v : v; + od_ec_enc_normalize(enc, l, r); +#if OD_MEASURE_EC_OVERHEAD + enc->entropy -= OD_LOG2((double)(val ? ft - fz : fz)/ft); + enc->nb_symbols++; +#endif +} + +/*Equivalent to od_ec_encode_bool() with ft == 32768. + val: The value to encode (0 or 1). + fz: The probability that _val is zero, scaled by 32768.*/ +void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz) { + od_ec_window l; + unsigned r; + unsigned v; + OD_ASSERT(0 < fz); + OD_ASSERT(fz < 32768U); + l = enc->low; + r = enc->rng; + OD_ASSERT(32768U <= r); +#if OD_EC_REDUCED_OVERHEAD + { + unsigned d; + unsigned e; + d = r - 32768U; + e = OD_SUBSATU(2*d, 32768U); + v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d); + } +#else + v = fz + OD_MINI(fz, r - 32768U); +#endif + if (val) l += v; + r = val ? r - v : v; + od_ec_enc_normalize(enc, l, r); +#if OD_MEASURE_EC_OVERHEAD + enc->entropy -= OD_LOG2((double)(val ? 32768 - fz : fz)/32768.); + enc->nb_symbols++; +#endif +} + +/*Encodes a symbol given a cumulative distribution function (CDF) table. + s: The index of the symbol to encode. + cdf: The CDF, such that symbol s falls in the range + [s > 0 ? cdf[s - 1] : 0, cdf[s]). + The values must be monotonically non-decreasing, and the last value + must be at least 16384, and no more than 32768. + nsyms: The number of symbols in the alphabet. + This should be at most 16.*/ +void od_ec_encode_cdf(od_ec_enc *enc, int s, + const uint16_t *cdf, int nsyms) { + OD_ASSERT(s >= 0); + OD_ASSERT(s < nsyms); + od_ec_encode(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]); +} + +/*Equivalent to od_ec_encode_cdf() with cdf[nsyms - 1] == 32768U. + s: The index of the symbol to encode. + cdf: The CDF, such that symbol s falls in the range + [s > 0 ? cdf[s - 1] : 0, cdf[s]). + The values must be monotonically non-decreasing, and the last value + must be exactly 32768. + nsyms: The number of symbols in the alphabet. + This should be at most 16.*/ +void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, + const uint16_t *cdf, int nsyms) { + (void)nsyms; + OD_ASSERT(s >= 0); + OD_ASSERT(s < nsyms); + OD_ASSERT(cdf[nsyms - 1] == 32768U); + od_ec_encode_q15(enc, s > 0 ? cdf[s - 1] : 0, cdf[s]); +} + +/*Encodes a symbol given a cumulative distribution function (CDF) table. + s: The index of the symbol to encode. + cdf: The CDF, such that symbol s falls in the range + [s > 0 ? cdf[s - 1] : 0, cdf[s]). + The values must be monotonically non-decreasing, and the last value + must be at least 2, and no more than 32768. + nsyms: The number of symbols in the alphabet. + This should be at most 16.*/ +void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s, + const uint16_t *cdf, int nsyms) { + OD_ASSERT(s >= 0); + OD_ASSERT(s < nsyms); + od_ec_encode_unscaled(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]); +} + +/*Equivalent to od_ec_encode_cdf() with cdf[nsyms - 1] == 1 << ftb. + s: The index of the symbol to encode. + cdf: The CDF, such that symbol s falls in the range + [s > 0 ? cdf[s - 1] : 0, cdf[s]). + The values must be monotonically non-decreasing, and the last value + must be exactly 1 << ftb. + nsyms: The number of symbols in the alphabet. + This should be at most 16. + ftb: The number of bits of precision in the cumulative distribution. + This must be no more than 15.*/ +void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s, + const uint16_t *cdf, int nsyms, unsigned ftb) { + (void)nsyms; + OD_ASSERT(s >= 0); + OD_ASSERT(s < nsyms); + OD_ASSERT(ftb <= 15); + OD_ASSERT(cdf[nsyms - 1] == 1U << ftb); + od_ec_encode_q15(enc, + s > 0 ? cdf[s - 1] << (15 - ftb) : 0, cdf[s] << (15 - ftb)); +} + +/*Encodes a raw unsigned integer in the stream. + fl: The integer to encode. + ft: The number of integers that can be encoded (one more than the max). + This must be at least 2, and no more than 2**29.*/ +void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft) { + OD_ASSERT(ft >= 2); + OD_ASSERT(fl < ft); + OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS)); + if (ft > 1U << OD_EC_UINT_BITS) { + int ft1; + int ftb; + ft--; + ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS; + ft1 = (int)(ft >> ftb) + 1; + od_ec_encode_cdf_q15(enc, (int)(fl >> ftb), OD_UNIFORM_CDF_Q15(ft1), ft1); + od_ec_enc_bits(enc, fl & (((uint32_t)1 << ftb) - 1), ftb); + } + else od_ec_encode_cdf_q15(enc, (int)fl, OD_UNIFORM_CDF_Q15(ft), (int)ft); +} + +/*Encodes a sequence of raw bits in the stream. + fl: The bits to encode. + ftb: The number of bits to encode. + This must be between 0 and 25, inclusive.*/ +void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb) { + od_ec_window end_window; + int nend_bits; + OD_ASSERT(ftb <= 25); + OD_ASSERT(fl < (uint32_t)1 << ftb); +#if OD_MEASURE_EC_OVERHEAD + enc->entropy += ftb; +#endif + end_window = enc->end_window; + nend_bits = enc->nend_bits; + if (nend_bits + ftb > OD_EC_WINDOW_SIZE) { + unsigned char *buf; + uint32_t storage; + uint32_t end_offs; + buf = enc->buf; + storage = enc->storage; + end_offs = enc->end_offs; + if (end_offs + (OD_EC_WINDOW_SIZE >> 3) >= storage) { + unsigned char *new_buf; + uint32_t new_storage; + new_storage = 2*storage + (OD_EC_WINDOW_SIZE >> 3); + new_buf = (unsigned char *)malloc(sizeof(*new_buf)*new_storage); + if (new_buf == NULL) { + enc->error = -1; + enc->end_offs = 0; + return; + } + OD_COPY(new_buf + new_storage - end_offs, + buf + storage - end_offs, end_offs); + storage = new_storage; + free(buf); + enc->buf = buf = new_buf; + enc->storage = storage; + } + do { + OD_ASSERT(end_offs < storage); + buf[storage - ++end_offs] = (unsigned char)end_window; + end_window >>= 8; + nend_bits -= 8; + } + while (nend_bits >= 8); + enc->end_offs = end_offs; + } + OD_ASSERT(nend_bits + ftb <= OD_EC_WINDOW_SIZE); + end_window |= (od_ec_window)fl << nend_bits; + nend_bits += ftb; + enc->end_window = end_window; + enc->nend_bits = nend_bits; +} + +/*Overwrites a few bits at the very start of an existing stream, after they + have already been encoded. + This makes it possible to have a few flags up front, where it is easy for + decoders to access them without parsing the whole stream, even if their + values are not determined until late in the encoding process, without having + to buffer all the intermediate symbols in the encoder. + In order for this to work, at least nbits bits must have already been encoded + using probabilities that are an exact power of two. + The encoder can verify the number of encoded bits is sufficient, but cannot + check this latter condition. + val: The bits to encode (in the least nbits significant bits). + They will be decoded in order from most-significant to least. + nbits: The number of bits to overwrite. + This must be no more than 8.*/ +void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits) { + int shift; + unsigned mask; + OD_ASSERT(nbits >= 0); + OD_ASSERT(nbits <= 8); + OD_ASSERT(val < 1U << nbits); + shift = 8 - nbits; + mask = ((1U << nbits) - 1) << shift; + if (enc->offs > 0) { + /*The first byte has been finalized.*/ + enc->precarry_buf[0] = + (uint16_t)((enc->precarry_buf[0] & ~mask) | val << shift); + } + else if (9 + enc->cnt + (enc->rng == 0x8000) > nbits) { + /*The first byte has yet to be output.*/ + enc->low = (enc->low & ~((od_ec_window)mask << (16 + enc->cnt))) | + (od_ec_window)val << (16 + enc->cnt + shift); + } + /*The encoder hasn't even encoded _nbits of data yet.*/ + else enc->error = -1; +} + +#if OD_MEASURE_EC_OVERHEAD +# include +#endif + +/*Indicates that there are no more symbols to encode. + All remaining output bytes are flushed to the output buffer. + od_ec_enc_reset() should be called before using the encoder again. + bytes: Returns the size of the encoded data in the returned buffer. + Return: A pointer to the start of the final buffer, or NULL if there was an + encoding error.*/ +unsigned char *od_ec_enc_done(od_ec_enc *enc, uint32_t *nbytes) { + unsigned char *out; + uint32_t storage; + uint16_t *buf; + uint32_t offs; + uint32_t end_offs; + int nend_bits; + od_ec_window m; + od_ec_window e; + od_ec_window l; + unsigned r; + int c; + int s; + if (enc->error) return NULL; +#if OD_MEASURE_EC_OVERHEAD + { + uint32_t tell; + /* Don't count the 1 bit we lose to raw bits as overhead. */ + tell = od_ec_enc_tell_frac(enc)/8.-1; + fprintf(stderr, "overhead: %f%%\n", 100*(tell-enc->entropy)/enc->entropy); + fprintf(stderr, "efficiency: %f bits/symbol\n", + (double)tell/enc->nb_symbols); + } +#endif + /*We output the minimum number of bits that ensures that the symbols encoded + thus far will be decoded correctly regardless of the bits that follow.*/ + l = enc->low; + r = enc->rng; + c = enc->cnt; + s = 9; + m = 0x7FFF; + e = (l + m) & ~m; + while ((e | m) >= l + r) { + s++; + m >>= 1; + e = (l + m) & ~m; + } + s += c; + offs = enc->offs; + buf = enc->precarry_buf; + if (s > 0) { + unsigned n; + storage = enc->precarry_storage; + if (offs + ((s + 7) >> 3) > storage) { + storage = storage*2 + ((s + 7) >> 3); + buf = (uint16_t *)realloc(buf, sizeof(*buf)*storage); + if (buf == NULL) { + enc->error = -1; + return NULL; + } + enc->precarry_buf = buf; + enc->precarry_storage = storage; + } + n = (1 << (c + 16)) - 1; + do { + OD_ASSERT(offs < storage); + buf[offs++] = (uint16_t)(e >> (c + 16)); + e &= n; + s -= 8; + c -= 8; + n >>= 8; + } + while (s > 0); + } + /*Make sure there's enough room for the entropy-coded bits and the raw + bits.*/ + out = enc->buf; + storage = enc->storage; + end_offs = enc->end_offs; + e = enc->end_window; + nend_bits = enc->nend_bits; + s = -s; + c = OD_MAXI((nend_bits - s + 7) >> 3, 0); + if (offs + end_offs + c > storage) { + storage = offs + end_offs + c; + out = (unsigned char *)realloc(out, sizeof(*out)*storage); + if (out == NULL) { + enc->error = -1; + return NULL; + } + OD_MOVE(out + storage - end_offs, out + enc->storage - end_offs, end_offs); + enc->buf = out; + enc->storage = storage; + } + /*If we have buffered raw bits, flush them as well.*/ + while (nend_bits > s) { + OD_ASSERT(end_offs < storage); + out[storage - ++end_offs] = (unsigned char)e; + e >>= 8; + nend_bits -= 8; + } + *nbytes = offs + end_offs; + /*Perform carry propagation.*/ + OD_ASSERT(offs + end_offs <= storage); + out = out + storage - (offs + end_offs); + c = 0; + end_offs = offs; + while (offs-- > 0) { + c = buf[offs] + c; + out[offs] = (unsigned char)c; + c >>= 8; + } + /*Add any remaining raw bits to the last byte. + There is guaranteed to be enough room, because nend_bits <= s.*/ + OD_ASSERT(nend_bits <= 0 || end_offs > 0); + if (nend_bits > 0) out[end_offs - 1] |= (unsigned char)e; + /*Note: Unless there's an allocation error, if you keep encoding into the + current buffer and call this function again later, everything will work + just fine (you won't get a new packet out, but you will get a single + buffer with the new data appended to the old). + However, this function is O(N) where N is the amount of data coded so far, + so calling it more than once for a given packet is a bad idea.*/ + return out; +} + +/*Returns the number of bits "used" by the encoded symbols so far. + This same number can be computed in either the encoder or the decoder, and is + suitable for making coding decisions. + Warning: The value returned by this function can decrease compared to an + earlier call, even after encoding more data, if there is an encoding error + (i.e., a failure to allocate enough space for the output buffer). + Return: The number of bits. + This will always be slightly larger than the exact value (e.g., all + rounding error is in the positive direction).*/ +int od_ec_enc_tell(od_ec_enc *enc) { + /*The 10 here counteracts the offset of -9 baked into cnt, and adds 1 extra + bit, which we reserve for terminating the stream.*/ + return (enc->offs + enc->end_offs)*8 + enc->cnt + enc->nend_bits + 10; +} + +/*Returns the number of bits "used" by the encoded symbols so far. + This same number can be computed in either the encoder or the decoder, and is + suitable for making coding decisions. + Warning: The value returned by this function can decrease compared to an + earlier call, even after encoding more data, if there is an encoding error + (i.e., a failure to allocate enough space for the output buffer). + Return: The number of bits scaled by 2**OD_BITRES. + This will always be slightly larger than the exact value (e.g., all + rounding error is in the positive direction).*/ +uint32_t od_ec_enc_tell_frac(od_ec_enc *enc) { + return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng); +} + +/*Saves a entropy coder checkpoint to dst. + This allows an encoder to reverse a series of entropy coder + decisions if it decides that the information would have been + better coded some other way.*/ +void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src) { + OD_COPY(dst, src, 1); +} + +/*Restores an entropy coder checkpoint saved by od_ec_enc_checkpoint. + This can only be used to restore from checkpoints earlier in the target + state's history: you can not switch backwards and forwards or otherwise + switch to a state which isn't a casual ancestor of the current state. + Restore is also incompatible with patching the initial bits, as the + changes will remain in the restored version.*/ +void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src) { + unsigned char *buf; + uint32_t storage; + uint16_t *precarry_buf; + uint32_t precarry_storage; + OD_ASSERT(dst->storage >= src->storage); + OD_ASSERT(dst->precarry_storage >= src->precarry_storage); + buf = dst->buf; + storage = dst->storage; + precarry_buf = dst->precarry_buf; + precarry_storage = dst->precarry_storage; + OD_COPY(dst, src, 1); + dst->buf = buf; + dst->storage = storage; + dst->precarry_buf = precarry_buf; + dst->precarry_storage = precarry_storage; +} diff --git a/enc/entenc.h b/enc/entenc.h new file mode 100644 index 0000000..e2b1ae0 --- /dev/null +++ b/enc/entenc.h @@ -0,0 +1,105 @@ +/*Daala video codec +Copyright (c) 2001-2013 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#if !defined(_entenc_H) +# define _entenc_H (1) +# include +# include "entcode.h" +typedef struct od_ec_enc od_ec_enc; + +#define OD_MEASURE_EC_OVERHEAD (0) + +/*The entropy encoder context.*/ +struct od_ec_enc { + /*Buffered output. + This contains only the raw bits until the final call to od_ec_enc_done(), + where all the arithmetic-coded data gets prepended to it.*/ + unsigned char *buf; + /*The size of the buffer.*/ + uint32_t storage; + /*The offset at which the last byte containing raw bits was written.*/ + uint32_t end_offs; + /*Bits that will be read from/written at the end.*/ + od_ec_window end_window; + /*Number of valid bits in end_window.*/ + int nend_bits; + /*A buffer for output bytes with their associated carry flags.*/ + uint16_t *precarry_buf; + /*The size of the pre-carry buffer.*/ + uint32_t precarry_storage; + /*The offset at which the next entropy-coded byte will be written.*/ + uint32_t offs; + /*The low end of the current range.*/ + od_ec_window low; + /*The number of values in the current range.*/ + uint16_t rng; + /*The number of bits of data in the current value.*/ + int16_t cnt; + /*Nonzero if an error occurred.*/ + int error; +#if OD_MEASURE_EC_OVERHEAD + double entropy; + int nb_symbols; +#endif +}; + +/*See entenc.c for further documentation.*/ + +void od_ec_enc_init(od_ec_enc *enc, uint32_t size) OD_ARG_NONNULL(1); +void od_ec_enc_reset(od_ec_enc *enc) OD_ARG_NONNULL(1); +void od_ec_enc_clear(od_ec_enc *enc) OD_ARG_NONNULL(1); + +void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned _ft) + OD_ARG_NONNULL(1); +void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz_q15) + OD_ARG_NONNULL(1); +void od_ec_encode_cdf(od_ec_enc *enc, int s, + const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(3); +void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, + const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(3); +void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s, + const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(3); +void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s, + const uint16_t *cdf, int nsyms, unsigned ftb) + OD_ARG_NONNULL(1) OD_ARG_NONNULL(3); + +void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft) +OD_ARG_NONNULL(1); + +void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb) + OD_ARG_NONNULL(1); + +void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits) + OD_ARG_NONNULL(1); +OD_WARN_UNUSED_RESULT unsigned char *od_ec_enc_done(od_ec_enc *enc, + uint32_t *nbytes) OD_ARG_NONNULL(1) OD_ARG_NONNULL(2); + +OD_WARN_UNUSED_RESULT int od_ec_enc_tell(od_ec_enc *enc) OD_ARG_NONNULL(1); +OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(od_ec_enc *enc) + OD_ARG_NONNULL(1); + +void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src); +void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src); + +#endif diff --git a/enc/mainenc.c b/enc/mainenc.c index aaec81c..7e94074 100644 --- a/enc/mainenc.c +++ b/enc/mainenc.c @@ -171,11 +171,7 @@ int main(int argc, char **argv) /* Initialize main bit stream */ stream_t stream; - stream.bitstream = (uint8_t *)malloc(MAX_BUFFER_SIZE * sizeof(uint8_t)); - stream.bitbuf = 0; - stream.bitrest = 32; - stream.bytepos = 0; - stream.bytesize = MAX_BUFFER_SIZE; + od_ec_enc_init(&stream, MAX_BUFFER_SIZE); /* Configure encoder */ encoder_info.params = params; @@ -569,7 +565,7 @@ int main(int argc, char **argv) fflush(stdout); /* Write compressed bits for this frame to file */ - flush_bytebuf(&stream, strfile); + flush_all_bits(&stream, strfile); if (reconfile){ /* Write output frame */ @@ -601,7 +597,6 @@ int main(int argc, char **argv) } - flush_all_bits(&stream, strfile); bit_rate_in_kbps = 0.001*params->frame_rate*(double)acc_num_bits/num_encoded_frames; /* Finised encoding sequence */ @@ -651,7 +646,7 @@ int main(int argc, char **argv) { fclose(reconfile); } - free(stream.bitstream); + od_ec_enc_clear(&stream); free(encoder_info.deblock_data); delete_config_params(params); return 0; diff --git a/enc/putbits.c b/enc/putbits.c index 85345f1..427d2f9 100644 --- a/enc/putbits.c +++ b/enc/putbits.c @@ -30,109 +30,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "global.h" #include "putbits.h" -static unsigned int mask[33] = { - 0x00000000,0x00000001,0x00000003,0x00000007, - 0x0000000f,0x0000001f,0x0000003f,0x0000007f, - 0x000000ff,0x000001ff,0x000003ff,0x000007ff, - 0x00000fff,0x00001fff,0x00003fff,0x00007fff, - 0x0000ffff,0x0001ffff,0x0003ffff,0x0007ffff, - 0x000fffff,0x001fffff,0x003fffff,0x007fffff, - 0x00ffffff,0x01ffffff,0x03ffffff,0x07ffffff, - 0x0fffffff,0x1fffffff,0x3fffffff,0x7fffffff, - 0xffffffff}; - -void flush_bytebuf(stream_t *str, FILE *outfile) -{ - if (outfile) - { - if (fwrite(str->bitstream,sizeof(unsigned char),str->bytepos,outfile) != str->bytepos) - { - fatalerror("Problem writing bitstream to file."); - } - } - str->bytepos = 0; -} - - void flush_all_bits(stream_t *str, FILE *outfile) { + unsigned char *buf; + uint32_t frame_bytes; int i; - int bytes = 4 - str->bitrest/8; - - printf("final flush: bytes=%4d\n",bytes); - if ((str->bytepos+bytes) > str->bytesize) - { - flush_bytebuf(str,outfile); - } - for (i = 0; i < bytes; i++) - { - str->bitstream[str->bytepos++] = (str->bitbuf >> (24-i*8)) & 0xff; - } - + buf = od_ec_enc_done(str, &frame_bytes); if (outfile) { - if (fwrite(str->bitstream,sizeof(unsigned char),str->bytepos,outfile) != str->bytepos) + uint8_t frame_bytes_buf[4]; + for (i = 0; i < 4; i++) + { + frame_bytes_buf[i] = (uint8_t)(frame_bytes >> (24 - i*8)); + } + if (fwrite(frame_bytes_buf, sizeof(frame_bytes_buf), 1, outfile) != 1 + || fwrite(buf, sizeof(*buf), frame_bytes, outfile) != frame_bytes) { fatalerror("Problem writing bitstream to file."); } } - str->bytepos = 0; -} - - -void flush_bitbuf(stream_t *str) -{ - if ((str->bytepos+4) > str->bytesize) - { - fatalerror("Run out of bits in stream buffer."); - } - str->bitstream[str->bytepos++] = (str->bitbuf >> 24) & 0xff; - str->bitstream[str->bytepos++] = (str->bitbuf >> 16) & 0xff; - str->bitstream[str->bytepos++] = (str->bitbuf >> 8) & 0xff; - str->bitstream[str->bytepos++] = str->bitbuf & 0xff; - str->bitbuf = 0; - str->bitrest = 32; -} - -void putbits(unsigned int n, unsigned int val, stream_t *str) -{ - unsigned int rest; - - if (n <= str->bitrest) - { - str->bitbuf |= ((val & mask[n]) << (str->bitrest-n)); - str->bitrest -= n; - } - else - { - rest = n-str->bitrest; - str->bitbuf |= (val >> rest) & mask[n-rest]; - flush_bitbuf(str); - str->bitbuf |= (val & mask[rest]) << (32-rest); - str->bitrest -= rest; - } -} - -int get_bit_pos(stream_t *str){ - int bitpos = 8*str->bytepos + (32 - str->bitrest); - return bitpos; -} - -void write_stream_pos(stream_t *stream, stream_pos_t *stream_pos){ - stream->bitrest = stream_pos->bitrest; - stream->bytepos = stream_pos->bytepos; - stream->bitbuf = stream_pos->bitbuf; -} - -void read_stream_pos(stream_pos_t *stream_pos, stream_t *stream){ - stream_pos->bitrest = stream->bitrest; - stream_pos->bytepos = stream->bytepos; - stream_pos->bitbuf = stream->bitbuf; -} - -void copy_stream(stream_t *str1, stream_t *str2){ - str1->bitrest = str2->bitrest; - str1->bytepos = str2->bytepos; - str1->bitbuf = str2->bitbuf; - memcpy(&(str1->bitstream[0]),&(str2->bitstream[0]),str2->bytepos*sizeof(uint8_t)); + od_ec_enc_reset(str); } diff --git a/enc/putbits.h b/enc/putbits.h index 855bde7..5dba40e 100644 --- a/enc/putbits.h +++ b/enc/putbits.h @@ -31,32 +31,42 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include "entenc.h" -typedef struct +typedef struct od_ec_enc stream_t; + +typedef struct od_ec_enc stream_pos_t; + +void flush_all_bits(stream_t *str, FILE *outfile); + +static inline uint32_t bitreverse(uint32_t val) { - uint32_t bytesize; //Buffer size - typically maximum compressed frame size - uint32_t bytepos; //Byte position in bitstream - uint8_t *bitstream; //Compressed bit stream - uint32_t bitbuf; //Recent bits not written the bitstream yet - uint32_t bitrest; //Empty bits in bitbuf -} stream_t; - -typedef struct + val = ((val >> 16) & 0x0000FFFFU) | ((val <<16) & 0xFFFF0000U); + val = ((val >> 8) & 0x00FF00FFU) | ((val << 8) & 0xFF00FF00U); + val = ((val >> 4) & 0x0F0F0F0FU) | ((val << 4) & 0xF0F0F0F0U); + val = ((val >> 2) & 0x33333333U) | ((val << 2) & 0xCCCCCCCCU); + return ((val >> 1) & 0x55555555U) | ((val << 1) & 0xAAAAAAAAUL); +} + +static inline void putbits(unsigned int n,unsigned int val,stream_t *str) { - uint32_t bytepos; //Byte position in bitstream - uint32_t bitbuf; //Recent bits not written the bitstream yet - uint32_t bitrest; //Empty bits in bitbuf -} stream_pos_t; + OD_ASSERT(n > 0); + od_ec_enc_bits(str, bitreverse(val << (32 - n)), n); +} -void flush_all_bits(stream_t *str, FILE *outfile); -void putbits(unsigned int n,unsigned int val,stream_t *str); -void flush_bytebuf(stream_t *str, FILE *outfile); -void flush_bitbuf(stream_t *str); -int get_bit_pos(stream_t *str); -unsigned int leading_zeros(unsigned int code); - -void write_stream_pos(stream_t *stream, stream_pos_t *stream_pos); -void read_stream_pos(stream_pos_t *stream_pos, stream_t *stream); -void copy_stream(stream_t *str1, stream_t *str2); +static inline int get_bit_pos(stream_t *str) +{ + return od_ec_enc_tell(str); +} + +static inline void write_stream_pos(stream_t *stream, stream_pos_t *stream_pos) +{ + od_ec_enc_rollback(stream, stream_pos); +} + +static inline void read_stream_pos(stream_pos_t *stream_pos, stream_t *stream) +{ + od_ec_enc_checkpoint(stream_pos, stream); +} #endif diff --git a/enc/write_bits.c b/enc/write_bits.c index a7e2119..d745b4e 100644 --- a/enc/write_bits.c +++ b/enc/write_bits.c @@ -371,8 +371,10 @@ void write_super_mode(stream_t *stream,write_data_t *write_data, int split_flag) } else{ /* Split flag = 0 */ - if (size > MIN_BLOCK_SIZE || split_flag==1) + if (!write_data->encode_rectangular_size + && (size > MIN_BLOCK_SIZE || split_flag==1)){ putbits(1,split_flag,stream); + } } }