diff --git a/Makefile b/Makefile
index 516670e..126d98b 100644
--- a/Makefile
+++ b/Makefile
@@ -20,6 +20,7 @@ endif
 COMMON_SOURCES = \
 	common/common_block.c \
 	common/common_frame.c \
+	common/entcode.c \
 	common/transform.c \
 	common/intra_prediction.c \
 	common/inter_prediction.c \
@@ -31,6 +32,7 @@ COMMON_SOURCES = \
 ENCODER_SOURCES = \
 	enc/encode_block.c \
 	enc/encode_frame.c \
+	enc/entenc.c \
 	enc/mainenc.c \
 	enc/putbits.c \
 	enc/putvlc.c \
@@ -41,6 +43,7 @@ ENCODER_SOURCES = \
 
 DECODER_SOURCES = \
 	dec/decode_block.c \
+	dec/entdec.c \
 	dec/getbits.c \
 	dec/getvlc.c \
 	dec/maindec.c \
diff --git a/common/entcode.c b/common/entcode.c
new file mode 100644
index 0000000..28faf87
--- /dev/null
+++ b/common/entcode.c
@@ -0,0 +1,91 @@
+/*Daala video codec
+Copyright (c) 2001-2012 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "entcode.h"
+
+/*CDFs for uniform probability distributions of small sizes (2 through 16,
+   inclusive).*/
+const uint16_t OD_UNIFORM_CDFS_Q15[135] = {
+  16384, 32768,
+  10923, 21845, 32768,
+   8192, 16384, 24576, 32768,
+   6554, 13107, 19661, 26214, 32768,
+   5461, 10923, 16384, 21845, 27307, 32768,
+   4681,  9362, 14043, 18725, 23406, 28087, 32768,
+   4096,  8192, 12288, 16384, 20480, 24576, 28672, 32768,
+   3641,  7282, 10923, 14564, 18204, 21845, 25486, 29127, 32768,
+   3277,  6554,  9830, 13107, 16384, 19661, 22938, 26214, 29491, 32768,
+   2979,  5958,  8937, 11916, 14895, 17873, 20852, 23831, 26810, 29789, 32768,
+   2731,  5461,  8192, 10923, 13653, 16384, 19115, 21845, 24576, 27307, 30037,
+  32768,
+   2521,  5041,  7562, 10082, 12603, 15124, 17644, 20165, 22686, 25206, 27727,
+  30247, 32768,
+   2341,  4681,  7022,  9362, 11703, 14043, 16384, 18725, 21065, 23406, 25746,
+  28087, 30427, 32768,
+   2185,  4369,  6554,  8738, 10923, 13107, 15292, 17476, 19661, 21845, 24030,
+  26214, 28399, 30583, 32768,
+   2048,  4096,  6144,  8192, 10240, 12288, 14336, 16384, 18432, 20480, 22528,
+  24576, 26624, 28672, 30720, 32768
+};
+
+/*Given the current total integer number of bits used and the current value of
+   rng, computes the fraction number of bits used to OD_BITRES precision.
+  This is used by od_ec_enc_tell_frac() and od_ec_dec_tell_frac().
+  nbits_total: The number of whole bits currently used, i.e., the value
+                returned by od_ec_enc_tell() or od_ec_dec_tell().
+  rng: The current value of rng from either the encoder or decoder state.
+  Return: The number of bits scaled by 2**OD_BITRES.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+uint32_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) {
+  uint32_t nbits;
+  int l;
+  int i;
+  /*To handle the non-integral number of bits still left in the encoder/decoder
+     state, we compute the worst-case number of bits of val that must be
+     encoded to ensure that the value is inside the range for any possible
+     subsequent bits.
+    The computation here is independent of val itself (the decoder does not
+     even track that value), even though the real number of bits used after
+     od_ec_enc_done() may be 1 smaller if rng is a power of two and the
+     corresponding trailing bits of val are all zeros.
+    If we did try to track that special case, then coding a value with a
+     probability of 1/(1 << n) might sometimes appear to use more than n bits.
+    This may help explain the surprising result that a newly initialized
+     encoder or decoder claims to have used 1 bit.*/
+  nbits = nbits_total << OD_BITRES;
+  l = 0;
+  for (i = OD_BITRES; i-- > 0;) {
+    int b;
+    rng = rng*rng >> 15;
+    b = (int)(rng >> 16);
+    l = l << 1 | b;
+    rng >>= b;
+  }
+  return nbits - l;
+}
diff --git a/common/entcode.h b/common/entcode.h
new file mode 100644
index 0000000..eed6c6f
--- /dev/null
+++ b/common/entcode.h
@@ -0,0 +1,120 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_entcode_H)
+# define _entcode_H (1)
+# include <limits.h>
+# include <stddef.h>
+# include <stdint.h>
+# include "odintrin.h"
+
+/*Set this flag 1 to enable a "reduced overhead" version of the entropy coder.
+  This uses a partition function that more accurately follows the input
+   probability estimates at the expense of some additional CPU cost (though
+   still an order of magnitude less than a full division).
+
+  In classic arithmetic coding, the partition function maps a value x in the
+   range [0, ft] to a value in y in [0, r] with 0 < ft <= r via
+    y = x*r/ft.
+  Any deviation from this value increases coding inefficiency.
+
+  To avoid divisions, we require ft <= r < 2*ft (enforcing it by shifting up
+   ft if necessary), and replace that function with
+    y = x + OD_MINI(x, r - ft).
+  This counts values of x smaller than r - ft double compared to values larger
+   than r - ft, which over-estimates the probability of symbols at the start of
+   the alphabet, and under-estimates the probability of symbols at the end of
+   the alphabet.
+  The overall coding inefficiency assuming accurate probability models and
+   independent symbols is in the 1% range, which is similar to that of CABAC.
+
+  To reduce overhead even further, we split this into two cases:
+  1) r - ft > ft - (r - ft).
+     That is, we have more values of x that are double-counted than
+      single-counted.
+     In this case, we still double-count the first 2*r - 3*ft values of x, but
+      after that we alternate between single-counting and double-counting for
+      the rest.
+  2) r - ft < ft - (r - ft).
+     That is, we have more values of x that are single-counted than
+      double-counted.
+     In this case, we alternate between single-counting and double-counting for
+      the first 2*(r - ft) values of x, and single-count the rest.
+  For two equiprobable symbols in different places in the alphabet, this
+   reduces the maximum ratio of over-estimation to under-estimation from 2:1
+   for the previous partition function to either 4:3 or 3:2 (for each of the
+   two cases above, respectively), assuming symbol probabilities significantly
+   greater than 1/32768.
+  That reduces the worst-case per-symbol overhead from 1 bit to 0.58 bits.
+
+  The resulting function is
+    e = OD_MAXI(2*r - 3*ft, 0);
+    y = x + OD_MINI(x, e) + OD_MINI(OD_MAXI(x - e, 0) >> 1, r - ft).
+  Here, e is a value that is greater than 0 in case 1, and 0 in case 2.
+  This function is about 3 times as expensive to evaluate as the high-overhead
+   version, but still an order of magnitude cheaper than a division, since it
+   is composed only of very simple operations.
+  Because we want to fit in 16-bit registers and must use unsigned values to do
+   so, we use saturating subtraction to enforce the maximums with 0.
+
+  Enabling this reduces the measured overhead in ectest from 0.805% to 0.621%
+   (vs. 0.022% for the division-based partition function with r much greater
+   than ft).
+  It improves performance on ntt-short-1 by about 0.3%.*/
+# define OD_EC_REDUCED_OVERHEAD (0)
+
+/*OPT: od_ec_window must be at least 32 bits, but if you have fast arithmetic
+   on a larger type, you can speed up the decoder by using it here.*/
+typedef uint32_t od_ec_window;
+
+# define OD_EC_WINDOW_SIZE ((int)sizeof(od_ec_window)*CHAR_BIT)
+
+/*Unsigned subtraction with unsigned saturation.
+  This implementation of the macro is intentionally chosen to increase the
+   number of common subexpressions in the reduced-overhead partition function.
+  This matters for C code, but it would not for hardware with a saturating
+   subtraction instruction.*/
+#define OD_SUBSATU(a, b) ((a) - OD_MINI(a, b))
+
+/*The number of bits to use for the range-coded part of unsigned integers.*/
+# define OD_EC_UINT_BITS (4)
+
+/*The resolution of fractional-precision bit usage measurements, i.e.,
+   3 => 1/8th bits.*/
+# define OD_BITRES (3)
+
+extern const uint16_t OD_UNIFORM_CDFS_Q15[135];
+
+/*Returns a Q15 CDF for a uniform probability distribution of the given size.
+  n: The size of the distribution.
+     This must be at least 2, and no more than 16.*/
+# define OD_UNIFORM_CDF_Q15(n) \
+   (OD_UNIFORM_CDFS_Q15 + ((n)*((n) - 1) >> 1) - 1)
+
+/*See entcode.c for further documentation.*/
+
+OD_WARN_UNUSED_RESULT uint32_t od_ec_tell_frac(uint32_t nbits_total,
+ uint32_t rng);
+
+#endif
diff --git a/common/global.h b/common/global.h
index c59e814..29e1655 100644
--- a/common/global.h
+++ b/common/global.h
@@ -40,7 +40,7 @@ static inline void fatalerror(char error_text[])
     fprintf(stderr,"Run-time error...\n");
     fprintf(stderr,"%s\n",error_text);
     fprintf(stderr,"...now exiting to system...\n");
-    exit(1);
+    abort();
 }
 
 #ifndef max
diff --git a/common/odintrin.h b/common/odintrin.h
new file mode 100644
index 0000000..c613959
--- /dev/null
+++ b/common/odintrin.h
@@ -0,0 +1,329 @@
+/*Daala video codec
+Copyright (c) 2003-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+/*Some common macros for potential platform-specific optimization.*/
+#if !defined(_odintrin_H)
+# define _odintrin_H (1)
+
+# if defined(_MSC_VER)
+#  define _USE_MATH_DEFINES
+# endif
+
+# include <math.h>
+# include <limits.h>
+# include <string.h>
+# include "global.h"
+# include "simd.h"
+
+# if defined(__GNUC__) && defined(__GNUC_MINOR__) \
+ && defined(__GNUC_PATCHLEVEL__)
+#  define OD_GNUC_PREREQ(maj, min, pat) \
+ ((__GNUC__ << 16) + (__GNUC_MINOR__ << 8) + __GNUC_PATCHLEVEL__ >= \
+ ((maj) << 16) + ((min) << 8) + pat)
+# else
+#  define OD_GNUC_PREREQ(maj, min, pat) (0)
+# endif
+
+# if OD_GNUC_PREREQ(3, 4, 0)
+#  define OD_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+# else
+#  define OD_WARN_UNUSED_RESULT
+# endif
+# if OD_GNUC_PREREQ(3, 4, 0)
+#  define OD_ARG_NONNULL(x) __attribute__((__nonnull__(x)))
+# else
+#  define OD_ARG_NONNULL(x)
+# endif
+
+# if defined(OD_ENABLE_ASSERTIONS)
+
+#  define OD_M2STR_WRAPPER(_m) #_m
+#  define OD_M2STR(_m) OD_M2STR_WRAPPER(_m)
+
+#  define OD_FATAL(_str) \
+ (fatalerror("Fatal (internal) error in " \
+  __FILE__ ", line " OD_M2STR(__LINE__) ": " _str))
+
+#  define OD_ASSERT(_cond) \
+  do { \
+    if (!(_cond)) { \
+      OD_FATAL("assertion failed: " # _cond); \
+    } \
+  } \
+  while (0)
+
+#  define OD_ASSERT2(_cond, _message) \
+  do { \
+    if (!(_cond)) { \
+      OD_FATAL("assertion failed: " # _cond "\n" _message); \
+    } \
+  } \
+  while (0)
+
+#  define OD_ALWAYS_TRUE(_cond) OD_ASSERT(_cond)
+
+# else
+#  define OD_ASSERT(_cond)
+#  define OD_ASSERT2(_cond, _message)
+#  define OD_ALWAYS_TRUE(_cond) ((void)(_cond))
+# endif
+
+# if !defined(M_PI)
+#  define M_PI      (3.1415926535897932384626433832795)
+# endif
+
+# if !defined(M_SQRT2)
+#  define M_SQRT2 (1.41421356237309504880168872420970)
+# endif
+
+# if !defined(M_SQRT1_2)
+#  define M_SQRT1_2 (0.70710678118654752440084436210485)
+# endif
+
+# if !defined(M_LOG2E)
+#  define M_LOG2E (1.4426950408889634073599246810019)
+# endif
+
+/*Some specific platforms may have optimized intrinsic or inline assembly
+   versions of these functions which can substantially improve performance.
+  We define macros for them to allow easy incorporation of these non-ANSI
+   features.*/
+
+/*Note that we do not provide a macro for abs(), because it is provided as a
+   library function, which we assume is translated into an intrinsic to avoid
+   the function call overhead and then implemented in the smartest way for the
+   target platform.
+  With modern gcc (4.x), this is true: it uses cmov instructions if the
+   architecture supports it and branchless bit-twiddling if it does not (the
+   speed difference between the two approaches is not measurable).
+  Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150)
+   by Sun Microsystems, despite prior art dating back to at least 1996:
+   http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT
+  On gcc 3.x, however, our assumption is not true, as abs() is translated to a
+   conditional jump, which is horrible on deeply piplined architectures (e.g.,
+   all consumer architectures for the past decade or more).
+  Also be warned that -C*abs(x) where C is a constant is mis-optimized as
+   abs(C*x) on every gcc release before 4.2.3.
+  See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */
+
+/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
+   given an appropriate architecture, but the branchless bit-twiddling versions
+   are just as fast, and do not require any special target architecture.
+  Earlier gcc versions (3.x) compiled both code to the same assembly
+   instructions, because of the way they represented ((b) > (a)) internally.*/
+/*#define OD_MAXI(a, b) ((a) < (b) ? (b) : (a))*/
+# define OD_MAXI(a, b) ((a) ^ (((a) ^ (b)) & -((b) > (a))))
+/*#define OD_MINI(a, b) ((a) > (b) ? (b) : (a))*/
+# define OD_MINI(a, b) ((a) ^ (((b) ^ (a)) & -((b) < (a))))
+/*This has a chance of compiling branchless, and is just as fast as the
+   bit-twiddling method, which is slightly less portable, since it relies on a
+   sign-extended rightshift, which is not guaranteed by ANSI (but present on
+   every relevant platform).*/
+# define OD_SIGNI(a) (((a) > 0) - ((a) < 0))
+/*Slightly more portable than relying on a sign-extended right-shift (which is
+   not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both)
+   compile it into the right-shift anyway.*/
+# define OD_SIGNMASK(a) (-((a) < 0))
+/*Unlike copysign(), simply inverts the sign of a if b is negative.*/
+# define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b))
+# define OD_COPYSIGNI(a, b) OD_FLIPSIGNI(abs(a), b)
+/*Clamps an integer into the given range.
+  If a > c, then the lower bound a is respected over the upper bound c (this
+   behavior is required to meet our documented API behavior).
+  a: The lower bound.
+  b: The value to clamp.
+  c: The upper boud.*/
+# define OD_CLAMPI(a, b, c) (OD_MAXI(a, OD_MINI(b, c)))
+/*Clamps a signed integer between 0 and 255, returning an unsigned char.
+  This assumes a char is 8 bits.*/
+# define OD_CLAMP255(x) \
+  ((unsigned char)((((x) < 0) - 1) & ((x) | -((x) > 255))))
+/*Divides a signed integer by a positive value with exact rounding.*/
+# define OD_DIV_ROUND(x, y) (((x) + OD_FLIPSIGNI((y) >> 1, x))/(y))
+# define OD_DIV_R0(x, y) (((x) + OD_FLIPSIGNI((((y) + 1) >> 1) - 1, (x)))/(y))
+# define OD_DIV_RE(x, y) \
+  (((x) + OD_FLIPSIGNI((((y) + 1) >> 1) - 1 + ((x)/(y) & 1), (x)))/(y))
+/*Divides an integer by a power of two, truncating towards 0.
+  dividend: The integer to divide.
+  shift: The non-negative power of two to divide by.
+  rmask: (1 << shift) - 1*/
+# define OD_DIV_POW2(dividend, shift, rmask) \
+  (((dividend) + (OD_SIGNMASK(dividend) & (rmask))) >> (shift))
+/*Divides x by 65536, truncating towards 0.*/
+# define OD_DIV2_16(x) OD_DIV_POW2(x, 16, 0xFFFF)
+/*Divides x by 2, truncating towards 0.*/
+# define OD_DIV2(x) OD_DIV_POW2(x, 1, 0x1)
+/*Divides x by 8, truncating towards 0.*/
+# define OD_DIV8(x) OD_DIV_POW2(x, 3, 0x7)
+/*Divides x by 16, truncating towards 0.*/
+# define OD_DIV16(x) OD_DIV_POW2(x, 4, 0xF)
+/*Right shifts dividend by shift, adding rval, and subtracting one for
+   negative dividends first.
+  When rval is (1 << (shift - 1)), this is equivalent to division with rounding
+   ties away from zero.*/
+# define OD_DIV_ROUND_POW2(dividend, shift, rval) \
+  (((dividend) + OD_SIGNMASK(dividend) + (rval)) >> (shift))
+/*Divides a x by 2, rounding towards even numbers.*/
+# define OD_DIV2_RE(x) ((x) + ((x) >> 1 & 1) >> 1)
+/*Divides a x by (1 << (shift)), rounding towards even numbers.*/
+# define OD_DIV_POW2_RE(x, shift) \
+  ((x) + (((1 << (shift)) + ((x) >> (shift) & 1) - 1) >> 1) >> (shift))
+/*Count leading zeros.
+  This macro should only be used for implementing od_ilog(), if it is defined.
+  All other code should use OD_ILOG() instead.*/
+# if defined(_MSC_VER)
+#  include <intrin.h>
+#  if !defined(snprintf)
+#   define snprintf _snprintf
+#  endif
+/*In _DEBUG mode this is not an intrinsic by default.*/
+#  pragma intrinsic(_BitScanReverse)
+
+static __inline int od_bsr(unsigned long x) {
+  unsigned long ret;
+  _BitScanReverse(&ret, x);
+  return (int)ret;
+}
+#  define OD_CLZ0 (1)
+#  define OD_CLZ(x) (-od_bsr(x))
+# elif defined(ENABLE_TI_DSPLIB)
+#  include "dsplib.h"
+#  define OD_CLZ0 (31)
+#  define OD_CLZ(x) (_lnorm(x))
+# elif OD_GNUC_PREREQ(3, 4, 0)
+#  if INT_MAX >= 2147483647
+#   define OD_CLZ0 ((int)sizeof(unsigned)*CHAR_BIT)
+#   define OD_CLZ(x) (__builtin_clz(x))
+#  elif LONG_MAX >= 2147483647L
+#   define OD_CLZ0 ((int)sizeof(unsigned long)*CHAR_BIT)
+#   define OD_CLZ(x) (__builtin_clzl(x))
+#  endif
+# endif
+# if defined(OD_CLZ)
+#  define OD_ILOG_NZ(x) (OD_CLZ0 - OD_CLZ(x))
+/*Note that __builtin_clz is not defined when x == 0, according to the gcc
+   documentation (and that of the x86 BSR instruction that implements it), so
+   we have to special-case it.
+  We define a special version of the macro to use when x can be zero.*/
+#  define OD_ILOG(x) (OD_ILOG_NZ(x) & -!!(x))
+# else
+#  define OD_ILOG_NZ(x) (1 + log2i(x))
+#  define OD_ILOG(x) (1 + log2i(x))
+# endif
+
+# define OD_LOG2(x) (M_LOG2E*log(x))
+
+/*Swaps two integers a and b if a > b.*/
+/*#define OD_SORT2I(a, b) \
+  if ((a) > (b)) { \
+    int t__; \
+    t__ = (a); \
+    (a) = (b); \
+    (b) = t__; \
+  }*/
+/*This branchless version is significantly faster than the above
+   straightforward implementation on modern processors.*/
+# define OD_SORT2I(a, b) \
+  do { \
+    int t__; \
+    t__ = ((a) ^ (b)) & -((b) < (a)); \
+    (a) ^= t__; \
+    (b) ^= t__; \
+  } \
+  while (0)
+
+/*All of these macros should expect floats as arguments.*/
+/*These two should compile as a single SSE instruction.*/
+# define OD_MINF(a, b) ((a) < (b) ? (a) : (b))
+# define OD_MAXF(a, b) ((a) > (b) ? (a) : (b))
+# define OD_CLAMPF(a, b, c) (OD_MAXF(a, OD_MINF(b, c)))
+# if defined(__GNUC__)
+#  define OD_FABSF(f) (fabsf(f))
+#  define OD_SQRTF(f) (sqrtf(f))
+#  define OD_POWF(b, e) (powf(b, e))
+#  define OD_LOGF(f) (logf(f))
+#  define OD_IFLOORF(f) (floorf(f))
+#  define OD_ICEILF(f) (ceilf(f))
+# else
+#  define OD_FABSF(f) ((float)fabs(f))
+#  define OD_SQRTF(f) ((float)sqrt(f))
+#  define OD_POWF(b, e) ((float)pow(b, e))
+#  define OD_LOGF(f) ((float)log(f))
+#  define OD_IFLOORF(f) ((int)floor(f))
+#  define OD_ICEILF(f)  ((int)ceil(f))
+# endif
+
+/** Copy n elements of memory from src to dst. The 0* term provides
+    compile-time type checking  */
+#if !defined(OVERRIDE_OD_COPY)
+# define OD_COPY(dst, src, n) \
+  (memcpy((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src))))
+#endif
+
+/** Copy n elements of memory from src to dst, allowing overlapping regions.
+    The 0* term provides compile-time type checking */
+#if !defined(OVERRIDE_OD_MOVE)
+# define OD_MOVE(dst, src, n) \
+ (memmove((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src)) ))
+#endif
+
+/** Set n elements of dst to zero */
+#if !defined(OVERRIDE_OD_CLEAR)
+# define OD_CLEAR(dst, n) (memset((dst), 0, sizeof(*(dst))*(n)))
+#endif
+
+/** Linkage will break without this if using a C++ compiler, and will issue
+ * warnings without this for a C compiler*/
+#if defined(__cplusplus)
+# define OD_EXTERN extern
+#else
+# define OD_EXTERN
+#endif
+
+/*Some assembly constructs require aligned operands.
+  The following macros are _only_ intended for structure member declarations.
+  Although they will sometimes work on stack variables, gcc will often silently
+   ignore them.
+  A separate set of macros could be made for manual stack alignment, but we
+   don't actually require it anywhere.*/
+# if defined(OD_X86ASM)||defined(OD_ARMASM)
+#  if defined(__GNUC__)
+#   define OD_ALIGN8(expr) expr __attribute__((aligned(8)))
+#   define OD_ALIGN16(expr) expr __attribute__((aligned(16)))
+#  elif defined(_MSC_VER)
+#   define OD_ALIGN8(expr) __declspec (align(8)) expr
+#   define OD_ALIGN16(expr) __declspec (align(16)) expr
+#  else
+#   error "Alignment macros required for this platform."
+#  endif
+# endif
+
+# if !defined(OD_ALIGN8)
+#  define OD_ALIGN8(expr) expr
+# endif
+# if !defined(OD_ALIGN16)
+#  define OD_ALIGN16(expr) expr
+# endif
+
+#endif
diff --git a/dec/decode_block.c b/dec/decode_block.c
index c658093..a88387b 100644
--- a/dec/decode_block.c
+++ b/dec/decode_block.c
@@ -469,7 +469,7 @@ void decode_block(decoder_info_t *decoder_info,int size,int ypos,int xpos){
 }
 
 
-int decode_super_mode(decoder_info_t *decoder_info, int size, int decode_rectangular_size){
+int decode_super_mode(decoder_info_t *decoder_info, int size, int decode_this_size){
   stream_t *stream = decoder_info->stream;
   block_context_t *block_context = decoder_info->block_context;
 
@@ -484,13 +484,13 @@ int decode_super_mode(decoder_info_t *decoder_info, int size, int decode_rectang
 
   if (frame_type==I_FRAME){
     decoder_info->mode = MODE_INTRA;
-    if (size > MIN_BLOCK_SIZE)
+    if (size > MIN_BLOCK_SIZE && decode_this_size)
       split_flag = getbits(stream, 1);
     else
-      split_flag = 0;
+      split_flag = !decode_this_size;
     return split_flag;
   }
-  if (decode_rectangular_size){
+  if (!decode_this_size){
     split_flag = !getbits(stream,1);
     return split_flag;
   }
@@ -640,7 +640,7 @@ void process_block_dec(decoder_info_t *decoder_info,int size,int yposY,int xposY
   int decode_this_size = (yposY + size <= height) && (xposY + size <= width);
   int decode_rectangular_size = !decode_this_size && frame_type != I_FRAME;
 
-  int bit_start = stream->bitcnt;
+  int bit_start = od_ec_dec_tell(&stream->ec);
 
   int mode = MODE_SKIP;
  
@@ -648,7 +648,7 @@ void process_block_dec(decoder_info_t *decoder_info,int size,int yposY,int xposY
   find_block_contexts(yposY, xposY, height, width, size, decoder_info->deblock_data, &block_context, decoder_info->use_block_contexts);
   decoder_info->block_context = &block_context;
 
-  split_flag = decode_super_mode(decoder_info,size,decode_rectangular_size);  
+  split_flag = decode_super_mode(decoder_info,size,decode_this_size);
   mode = decoder_info->mode;
   
   /* Read delta_qp and set block-level qp */
@@ -658,7 +658,8 @@ void process_block_dec(decoder_info_t *decoder_info,int size,int yposY,int xposY
     decoder_info->frame_info.qpb = decoder_info->frame_info.qp + delta_qp;
   }
 
-  decoder_info->bit_count.super_mode[decoder_info->bit_count.stat_frame_type] += (stream->bitcnt - bit_start);
+  decoder_info->bit_count.super_mode[decoder_info->bit_count.stat_frame_type]
+   += (od_ec_dec_tell(&stream->ec) - bit_start);
 
   if (split_flag){
     int new_size = size/2;
diff --git a/dec/decode_frame.c b/dec/decode_frame.c
index dcff858..9e0ee67 100644
--- a/dec/decode_frame.c
+++ b/dec/decode_frame.c
@@ -52,7 +52,7 @@ void decode_frame(decoder_info_t *decoder_info)
   stream_t *stream = decoder_info->stream;
   memset(decoder_info->deblock_data, 0, ((height/MIN_PB_SIZE) * (width/MIN_PB_SIZE) * sizeof(deblock_data_t)) );
 
-  int bit_start = stream->bitcnt;
+  int bit_start = od_ec_dec_tell(&stream->ec);
 
   decoder_info->frame_info.frame_type = getbits(stream,1);
   int qp = getbits(stream,8);
@@ -92,7 +92,8 @@ void decode_frame(decoder_info_t *decoder_info)
   decoder_info->bit_count.stat_frame_type = decoder_info->frame_info.frame_type;
   if (decoder_info->frame_info.frame_type != I_FRAME && decoder_info->num_reorder_pics > 0 && decoder_info->frame_info.display_frame_num%(decoder_info->num_reorder_pics+1)) 
       decoder_info->bit_count.stat_frame_type = B_FRAME;
-  decoder_info->bit_count.frame_header[decoder_info->bit_count.stat_frame_type] += (stream->bitcnt - bit_start);
+  decoder_info->bit_count.frame_header[decoder_info->bit_count.stat_frame_type] +=
+   (od_ec_dec_tell(&stream->ec) - bit_start);
   decoder_info->bit_count.frame_type[decoder_info->bit_count.stat_frame_type] += 1;
   decoder_info->frame_info.qp = qp;
   decoder_info->frame_info.qpb = qp;
diff --git a/dec/entdec.c b/dec/entdec.c
new file mode 100644
index 0000000..adeab64
--- /dev/null
+++ b/dec/entdec.c
@@ -0,0 +1,566 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "entdec.h"
+
+/*A range decoder.
+  This is an entropy decoder based upon \cite{Mar79}, which is itself a
+   rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
+  It is very similar to arithmetic encoding, except that encoding is done with
+   digits in any base, instead of with bits, and so it is faster when using
+   larger bases (i.e.: a byte).
+  The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
+   is the base, longer than the theoretical optimum, but to my knowledge there
+   is no published justification for this claim.
+  This only seems true when using near-infinite precision arithmetic so that
+   the process is carried out with no rounding errors.
+
+  An excellent description of implementation details is available at
+   http://www.arturocampos.com/ac_range.html
+  A recent work \cite{MNW98} which proposes several changes to arithmetic
+   encoding for efficiency actually re-discovers many of the principles
+   behind range encoding, and presents a good theoretical analysis of them.
+
+  End of stream is handled by writing out the smallest number of bits that
+   ensures that the stream will be correctly decoded regardless of the value of
+   any subsequent bits.
+  od_ec_dec_tell() can be used to determine how many bits were needed to decode
+   all the symbols thus far; other data can be packed in the remaining bits of
+   the input buffer.
+  @PHDTHESIS{Pas76,
+    author="Richard Clark Pasco",
+    title="Source coding algorithms for fast data compression",
+    school="Dept. of Electrical Engineering, Stanford University",
+    address="Stanford, CA",
+    month=May,
+    year=1976,
+    URL="http://www.richpasco.org/scaffdc.pdf"
+  }
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video & Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul,
+   URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
+  }*/
+
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+#define OD_EC_LOTS_OF_BITS (0x4000)
+
+static void od_ec_dec_refill(od_ec_dec *dec) {
+  int s;
+  od_ec_window dif;
+  int16_t cnt;
+  const unsigned char *bptr;
+  const unsigned char *end;
+  dif = dec->dif;
+  cnt = dec->cnt;
+  bptr = dec->bptr;
+  end = dec->end;
+  s = OD_EC_WINDOW_SIZE - 9 - (cnt + 15);
+  for (; s >= 0 && bptr < end; s -= 8, bptr++) {
+    OD_ASSERT(s <= OD_EC_WINDOW_SIZE - 8);
+    dif |= (od_ec_window)bptr[0] << s;
+    cnt += 8;
+  }
+  if (bptr >= end) {
+    dec->tell_offs += OD_EC_LOTS_OF_BITS - cnt;
+    cnt = OD_EC_LOTS_OF_BITS;
+  }
+  dec->dif = dif;
+  dec->cnt = cnt;
+  dec->bptr = bptr;
+}
+
+/*Takes updated dif and range values, renormalizes them so that
+   32768 <= rng < 65536 (reading more bytes from the stream into dif if
+   necessary), and stores them back in the decoder context.
+  dif: The new value of dif.
+  rng: The new value of the range.
+  ret: The value to return.
+  Return: ret.
+          This allows the compiler to jump to this function via a tail-call.*/
+static int od_ec_dec_normalize(od_ec_dec *dec,
+ od_ec_window dif, unsigned rng, int ret) {
+  int d;
+  OD_ASSERT(rng <= 65535U);
+  d = 16 - OD_ILOG_NZ(rng);
+  dec->cnt -= d;
+  dec->dif = dif << d;
+  dec->rng = rng << d;
+  if (dec->cnt < 0) od_ec_dec_refill(dec);
+  return ret;
+}
+
+/*Initializes the decoder.
+  buf: The input buffer to use.
+  Return: 0 on success, or a negative value on error.*/
+void od_ec_dec_init(od_ec_dec *dec,
+ const unsigned char *buf, uint32_t storage) {
+  dec->buf = buf;
+  dec->eptr = buf + storage;
+  dec->end_window = 0;
+  dec->nend_bits = 0;
+  dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8);
+  dec->end = buf + storage;
+  dec->bptr = buf;
+  dec->dif = 0;
+  dec->rng = 0x8000;
+  dec->cnt = -15;
+  dec->error = 0;
+  od_ec_dec_refill(dec);
+}
+
+/*Decode a bit that has an fz/ft probability of being a zero.
+  fz: The probability that the bit is zero, scaled by _ft.
+  ft: The total probability.
+      This must be at least 16384 and no more than 32768.
+  Return: The value decoded (0 or 1).*/
+int od_ec_decode_bool(od_ec_dec *dec, unsigned fz, unsigned ft) {
+  od_ec_window dif;
+  od_ec_window vw;
+  unsigned r;
+  int s;
+  unsigned v;
+  int ret;
+  OD_ASSERT(0 < fz);
+  OD_ASSERT(fz < ft);
+  OD_ASSERT(16384 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(ft <= r);
+  s = r - ft >= ft;
+  ft <<= s;
+  fz <<= s;
+  OD_ASSERT(r - ft < ft);
+#if OD_EC_REDUCED_OVERHEAD
+  {
+    unsigned d;
+    unsigned e;
+    d = r - ft;
+    e = OD_SUBSATU(2*d, ft);
+    v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
+  }
+#else
+  v = fz + OD_MINI(fz, r - ft);
+#endif
+  vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
+  ret = dif >= vw;
+  if (ret) dif -= vw;
+  r = ret ? r - v : v;
+  return od_ec_dec_normalize(dec, dif, r, ret);
+}
+
+/*Equivalent to od_ec_decode_bool() with ft == 32768.
+  fz: The probability that the bit is zero, scaled by 32768.
+  Return: The value decoded (0 or 1).*/
+int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned fz) {
+  od_ec_window dif;
+  od_ec_window vw;
+  unsigned r;
+  unsigned v;
+  int ret;
+  OD_ASSERT(0 < fz);
+  OD_ASSERT(fz < 32768U);
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(32768U <= r);
+#if OD_EC_REDUCED_OVERHEAD
+  {
+    unsigned d;
+    unsigned e;
+    d = r - 32768U;
+    e = OD_SUBSATU(2*d, 32768U);
+    v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
+  }
+#else
+  v = fz + OD_MINI(fz, r - 32768U);
+#endif
+  vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
+  ret = dif >= vw;
+  if (ret) dif -= vw;
+  r = ret ? r - v : v;
+  return od_ec_dec_normalize(dec, dif, r, ret);
+}
+
+/*Decodes a symbol given a cumulative distribution function (CDF) table.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-increasing, and cdf[nsyms - 1]
+        must be at least 16384, and no more than 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  Return: The decoded symbol s.*/
+int od_ec_decode_cdf(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
+  od_ec_window dif;
+  unsigned r;
+  unsigned c;
+  unsigned d;
+#if OD_EC_REDUCED_OVERHEAD
+  unsigned e;
+#endif
+  int s;
+  unsigned u;
+  unsigned v;
+  unsigned q;
+  unsigned fl;
+  unsigned fh;
+  unsigned ft;
+  int ret;
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(nsyms > 0);
+  ft = cdf[nsyms - 1];
+  OD_ASSERT(16384 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  OD_ASSERT(ft <= r);
+  s = r - ft >= ft;
+  ft <<= s;
+  d = r - ft;
+  OD_ASSERT(d < ft);
+  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
+  q = OD_MAXI((int)(c >> 1), (int)(c - d));
+#if OD_EC_REDUCED_OVERHEAD
+  e = OD_SUBSATU(2*d, ft);
+  /*The correctness of this inverse partition function is not obvious, but it
+     was checked exhaustively for all possible values of r, ft, and c.
+    TODO: It should be possible to optimize this better than the compiler,
+     given that we do not care about the accuracy of negative results (as we
+     will not use them).
+    It would also be nice to get rid of the 32-bit dividend, as it requires a
+     32x32->64 bit multiply to invert.*/
+  q = OD_MAXI((int)q, (int)((2*(int32_t)c + 1 - (int32_t)e)/3));
+#endif
+  q >>= s;
+  OD_ASSERT(q < ft >> s);
+  fl = 0;
+  ret = 0;
+  for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
+  OD_ASSERT(fh <= ft >> s);
+  fl <<= s;
+  fh <<= s;
+#if OD_EC_REDUCED_OVERHEAD
+  u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
+  v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
+#else
+  u = fl + OD_MINI(fl, d);
+  v = fh + OD_MINI(fh, d);
+#endif
+  r = v - u;
+  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
+  return od_ec_dec_normalize(dec, dif, r, ret);
+}
+
+/*Decodes a symbol given a cumulative distribution function (CDF) table.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-increasing, and cdf[nsyms - 1]
+        must be 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  Return: The decoded symbol s.*/
+int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
+  od_ec_window dif;
+  unsigned r;
+  unsigned c;
+  unsigned d;
+#if OD_EC_REDUCED_OVERHEAD
+  unsigned e;
+#endif
+  unsigned u;
+  unsigned v;
+  unsigned q;
+  unsigned fl;
+  unsigned fh;
+  int ret;
+  (void)nsyms;
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(nsyms > 0);
+  OD_ASSERT(cdf[nsyms - 1] == 32768U);
+  OD_ASSERT(32768U <= r);
+  d = r - 32768U;
+  OD_ASSERT(d < 32768U);
+  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
+  q = OD_MAXI((int)(c >> 1), (int)(c - d));
+#if OD_EC_REDUCED_OVERHEAD
+  e = OD_SUBSATU(2*d, 32768U);
+  /*TODO: See TODO above.*/
+  q = OD_MAXI((int)q, (int)((2*(int32_t)c + 1 - (int32_t)e)/3));
+#endif
+  OD_ASSERT(q < 32768U);
+  fl = 0;
+  ret = 0;
+  for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
+  OD_ASSERT(fh <= 32768U);
+#if OD_EC_REDUCED_OVERHEAD
+  u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
+  v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
+#else
+  u = fl + OD_MINI(fl, d);
+  v = fh + OD_MINI(fh, d);
+#endif
+  r = v - u;
+  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
+  return od_ec_dec_normalize(dec, dif, r, ret);
+}
+
+/*Decodes a symbol given a cumulative distribution function (CDF) table.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-increasing, and cdf[nsyms - 1]
+       must be at least 2, and no more than 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  Return: The decoded symbol s.*/
+int od_ec_decode_cdf_unscaled(od_ec_dec *dec,
+ const uint16_t *cdf, int nsyms) {
+  od_ec_window dif;
+  unsigned r;
+  unsigned c;
+  unsigned d;
+#if OD_EC_REDUCED_OVERHEAD
+  unsigned e;
+#endif
+  int s;
+  unsigned u;
+  unsigned v;
+  unsigned q;
+  unsigned fl;
+  unsigned fh;
+  unsigned ft;
+  int ret;
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(nsyms > 0);
+  ft = cdf[nsyms - 1];
+  OD_ASSERT(2 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  s = 15 - OD_ILOG_NZ(ft - 1);
+  ft <<= s;
+  OD_ASSERT(ft <= r);
+  if (r - ft >= ft) {
+    ft <<= 1;
+    s++;
+  }
+  d = r - ft;
+  OD_ASSERT(d < ft);
+  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
+  q = OD_MAXI((int)(c >> 1), (int)(c - d));
+#if OD_EC_REDUCED_OVERHEAD
+  e = OD_SUBSATU(2*d, ft);
+  /*TODO: See TODO above.*/
+  q = OD_MAXI((int)q, (int)((2*(int32_t)c + 1 - (int32_t)e)/3));
+#endif
+  q >>= s;
+  OD_ASSERT(q < ft >> s);
+  fl = 0;
+  ret = 0;
+  for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
+  OD_ASSERT(fh <= ft >> s);
+  fl <<= s;
+  fh <<= s;
+#if OD_EC_REDUCED_OVERHEAD
+  u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
+  v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
+#else
+  u = fl + OD_MINI(fl, d);
+  v = fh + OD_MINI(fh, d);
+#endif
+  r = v - u;
+  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
+  return od_ec_dec_normalize(dec, dif, r, ret);
+}
+
+/*Decodes a symbol given a cumulative distribution function (CDF) table.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-increasing, and cdf[nsyms - 1]
+       must be exactly 1 << ftb.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  ftb: The number of bits of precision in the cumulative distribution.
+       This must be no more than 15.
+  Return: The decoded symbol s.*/
+int od_ec_decode_cdf_unscaled_dyadic(od_ec_dec *dec,
+ const uint16_t *cdf, int nsyms, unsigned ftb) {
+  od_ec_window dif;
+  unsigned r;
+  unsigned c;
+  unsigned d;
+#if OD_EC_REDUCED_OVERHEAD
+  unsigned e;
+#endif
+  int s;
+  unsigned u;
+  unsigned v;
+  unsigned q;
+  unsigned fl;
+  unsigned fh;
+  int ret;
+  (void)nsyms;
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(ftb <= 15);
+  OD_ASSERT(cdf[nsyms - 1] == 1U << ftb);
+  s = 15 - ftb;
+  OD_ASSERT(32768U <= r);
+  d = r - 32768U;
+  OD_ASSERT(d < 32768U);
+  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
+  q = OD_MAXI((int)(c >> 1), (int)(c - d));
+#if OD_EC_REDUCED_OVERHEAD
+  e = OD_SUBSATU(2*d, 32768U);
+  /*TODO: See TODO above.*/
+  q = OD_MAXI((int)q, (int)((2*(int32_t)c + 1 - (int32_t)e)/3));
+#endif
+  q >>= s;
+  OD_ASSERT(q < 1U << ftb);
+  fl = 0;
+  ret = 0;
+  for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
+  OD_ASSERT(fh <= 1U << ftb);
+  fl <<= s;
+  fh <<= s;
+#if OD_EC_REDUCED_OVERHEAD
+  u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
+  v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
+#else
+  u = fl + OD_MINI(fl, d);
+  v = fh + OD_MINI(fh, d);
+#endif
+  r = v - u;
+  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
+  return od_ec_dec_normalize(dec, dif, r, ret);
+}
+
+/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
+  The integer must have been encoded with od_ec_enc_uint().
+  ft: The number of integers that can be decoded (one more than the max).
+      This must be at least 2, and no more than 2**29.
+  Return: The decoded bits.*/
+uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft) {
+  OD_ASSERT(ft >= 2);
+  OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS));
+  if (ft > 1U << OD_EC_UINT_BITS) {
+    uint32_t t;
+    int ft1;
+    int ftb;
+    ft--;
+    ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
+    ft1 = (int)(ft >> ftb) + 1;
+    t = od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft1), ft1);
+    t = t << ftb | od_ec_dec_bits(dec, ftb);
+    if (t <= ft) return t;
+    dec->error = 1;
+    return ft;
+  }
+  return od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft), (int)ft);
+}
+
+/*Extracts a sequence of raw bits from the stream.
+  The bits must have been encoded with od_ec_enc_bits().
+  ftb: The number of bits to extract.
+       This must be between 0 and 25, inclusive.
+  Return: The decoded bits.*/
+uint32_t od_ec_dec_bits(od_ec_dec *dec, unsigned ftb) {
+  od_ec_window window;
+  int available;
+  uint32_t ret;
+  OD_ASSERT(ftb <= 25);
+  window = dec->end_window;
+  available = dec->nend_bits;
+  if ((unsigned)available < ftb) {
+    const unsigned char *buf;
+    const unsigned char *eptr;
+    buf = dec->buf;
+    eptr = dec->eptr;
+    OD_ASSERT(available <= OD_EC_WINDOW_SIZE - 8);
+    do {
+      if (eptr <= buf) {
+        dec->tell_offs += OD_EC_LOTS_OF_BITS - available;
+        available = OD_EC_LOTS_OF_BITS;
+        break;
+      }
+      window |= (od_ec_window)*--eptr << available;
+      available += 8;
+    }
+    while (available <= OD_EC_WINDOW_SIZE - 8);
+    dec->eptr = eptr;
+  }
+  ret = (uint32_t)window & (((uint32_t)1 << ftb) - 1);
+  window >>= ftb;
+  available -= ftb;
+  dec->end_window = window;
+  dec->nend_bits = available;
+  return ret;
+}
+
+/*Returns the number of bits "used" by the decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+int od_ec_dec_tell(od_ec_dec *dec) {
+  return ((dec->end - dec->eptr) + (dec->bptr - dec->buf))*8
+   - dec->cnt - dec->nend_bits + dec->tell_offs;
+}
+
+/*Returns the number of bits "used" by the decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits scaled by 2**OD_BITRES.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+uint32_t od_ec_dec_tell_frac(od_ec_dec *dec) {
+  return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng);
+}
diff --git a/dec/entdec.h b/dec/entdec.h
new file mode 100644
index 0000000..bc57eab
--- /dev/null
+++ b/dec/entdec.h
@@ -0,0 +1,91 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_entdec_H)
+# define _entdec_H (1)
+# include <limits.h>
+# include "entcode.h"
+
+typedef struct od_ec_dec od_ec_dec;
+
+/*The entropy decoder context.*/
+struct od_ec_dec {
+  /*The start of the current input buffer.*/
+  const unsigned char *buf;
+  /*The read pointer for the raw bits.*/
+  const unsigned char *eptr;
+  /*Bits that will be read from/written at the end.*/
+  od_ec_window end_window;
+  /*Number of valid bits in end_window.*/
+  int nend_bits;
+  /*An offset used to keep track of tell after reaching the end of the stream.
+    This is constant throughout most of the decoding process, but becomes
+     important once we hit the end of the buffer and stop incrementing pointers
+     (and instead pretend cnt/nend_bits have lots of bits).*/
+  int32_t tell_offs;
+  /*The end of the current input buffer.*/
+  const unsigned char *end;
+  /*The read pointer for the entropy-coded bits.*/
+  const unsigned char *bptr;
+  /*The difference between the coded value and the low end of the current
+     range.*/
+  od_ec_window dif;
+  /*The number of values in the current range.*/
+  uint16_t rng;
+  /*The number of bits of data in the current value.*/
+  int16_t cnt;
+  /*Nonzero if an error occurred.*/
+  int error;
+};
+
+/*See entdec.c for further documentation.*/
+
+void od_ec_dec_init(od_ec_dec *dec,
+ const unsigned char *buf, uint32_t storage)
+ OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+
+OD_WARN_UNUSED_RESULT int od_ec_decode_bool(od_ec_dec *dec, unsigned fz,
+ unsigned ft) OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned fz)
+ OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT int od_ec_decode_cdf(od_ec_dec *dec,
+ const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_q15(od_ec_dec *dec,
+ const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled(od_ec_dec *dec,
+ const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled_dyadic(od_ec_dec *dec,
+ const uint16_t *cdf, int nsyms, unsigned _ftb)
+ OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+
+OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_uint(od_ec_dec *dec,
+ uint32_t ft) OD_ARG_NONNULL(1);
+
+OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits(od_ec_dec *dec,
+ unsigned ftb) OD_ARG_NONNULL(1);
+
+OD_WARN_UNUSED_RESULT int od_ec_dec_tell(od_ec_dec *dec) OD_ARG_NONNULL(1);
+uint32_t od_ec_dec_tell_frac(od_ec_dec *dec) OD_ARG_NONNULL(1);
+
+#endif
diff --git a/dec/getbits.c b/dec/getbits.c
index 34bd0c3..104b3f0 100644
--- a/dec/getbits.c
+++ b/dec/getbits.c
@@ -24,123 +24,80 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "global.h"
 #include "getbits.h"
 
-
-/* to mask the n least significant bits of an integer */
-static const unsigned int msk[33] =
-{
-  0x00000000,0x00000001,0x00000003,0x00000007,
-  0x0000000f,0x0000001f,0x0000003f,0x0000007f,
-  0x000000ff,0x000001ff,0x000003ff,0x000007ff,
-  0x00000fff,0x00001fff,0x00003fff,0x00007fff,
-  0x0000ffff,0x0001ffff,0x0003ffff,0x0007ffff,
-  0x000fffff,0x001fffff,0x003fffff,0x007fffff,
-  0x00ffffff,0x01ffffff,0x03ffffff,0x07ffffff,
-  0x0fffffff,0x1fffffff,0x3fffffff,0x7fffffff,
-  0xffffffff
-};
-
 int initbits_dec(FILE *infile, stream_t *str)
 {
-  fpos_t fpos[1];
-  long pos1,pos2;
-
-  str->incnt = 0;
-  str->rdptr = str->rdbfr + 2048;
-  str->bitcnt = 0;
+  uint8_t frame_bytes_buf[4];
+  uint32_t length;
+  int ret;
   str->infile = infile;
-
-  fgetpos(str->infile,fpos);
-  pos1 = ftell(str->infile);
-  fseek(str->infile,0,SEEK_END);
-  pos2 = ftell(str->infile);
-  fsetpos(str->infile,fpos);
-  str->length = pos2 - pos1;
-  return 0;
-}
-
-int fillbfr(stream_t *str)
-{
-    //int l;
-
-  while (str->incnt <= 24 && (str->rdptr < str->rdbfr + 2048))
-  {
-    str->inbfr = (str->inbfr << 8) | *str->rdptr++;
-    str->incnt += 8;
-  }
-
-  if (str->rdptr >= str->rdbfr + 2048)
-  {
-    //l = (int)fread(str->rdbfr,sizeof(unsigned char),2048,str->infile);
-    fread(str->rdbfr,sizeof(unsigned char),2048,str->infile);
-    str->rdptr = str->rdbfr;
-
-    while (str->incnt <= 24 && (str->rdptr < str->rdbfr + 2048))
-    {
-      str->inbfr = (str->inbfr << 8) | *str->rdptr++;
-      str->incnt += 8;
-    }
-  }
-
-  return 0;
-}
-
-unsigned int getbits(stream_t *str, int n)
-{
-
-  if (str->incnt < n)
+  length = 0;
+  ret = fread(frame_bytes_buf, sizeof(frame_bytes_buf), 1, infile) != 1;
+  if (!ret)
   {
-    fillbfr(str);
-    if (str->incnt < n)
+    unsigned char *buf;
+    length = frame_bytes_buf[0] << 24 | frame_bytes_buf[1] << 16
+     | frame_bytes_buf[2] << 8 | frame_bytes_buf[3];
+    buf = realloc(str->buf, sizeof(*buf)*length);
+    ret = buf == NULL;
+    if (!ret)
     {
-      unsigned int l = str->inbfr;
-      unsigned int k = *str->rdptr++;
-      int shift = n-str->incnt;
-      str->inbfr = (str->inbfr << 8) | k;
-      str->incnt = str->incnt - n + 8;
-      str->bitcnt += n;
-      return (((l << shift) | (k >> (8-shift))) & msk[n]);
+      ret = fread(buf, sizeof(*buf), length, str->infile) != length;
+      if (!ret)
+      {
+        od_ec_dec_init(&str->ec, buf, length);
+      }
     }
   }
-
-  str->incnt -= n;
-  str->bitcnt += n;
-  return ((str->inbfr >> str->incnt) & msk[n]);
+  return ret;
 }
 
-unsigned int getbits1(stream_t *str)
-{
-  if (str->incnt < 1)
-  {
-    fillbfr(str);
-  }
-  str->incnt--;
-  str->bitcnt++;
-  return ((str->inbfr >> str->incnt) & 1);
-}
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+#define OD_EC_LOTS_OF_BITS (0x4000)
 
 unsigned int showbits(stream_t *str, int n)
 {
-  if (str->incnt < n)
-  {
-    fillbfr(str);
-    if (str->incnt < n)
-    {
-      int shift = n-str->incnt;
-      return (((str->inbfr << shift) | (str->rdptr[0] >> (8-shift))) & msk[n]);
+  od_ec_window window;
+  int available;
+  uint32_t ret;
+  OD_ASSERT(n <= 25);
+  window = str->ec.end_window;
+  available = str->ec.nend_bits;
+  if ((unsigned)available < n) {
+    const unsigned char *buf;
+    const unsigned char *eptr;
+    buf = str->ec.buf;
+    eptr = str->ec.eptr;
+    OD_ASSERT(available <= OD_EC_WINDOW_SIZE - 8);
+    do {
+      if (eptr <= buf) {
+        str->ec.tell_offs += OD_EC_LOTS_OF_BITS - available;
+        available = OD_EC_LOTS_OF_BITS;
+        break;
+      }
+      window |= (od_ec_window)*--eptr << available;
+      available += 8;
     }
+    while (available <= OD_EC_WINDOW_SIZE - 8);
+    str->ec.eptr = eptr;
+    str->ec.end_window = window;
+    str->ec.nend_bits = available;
   }
-
-  return ((str->inbfr >> (str->incnt-n)) & msk[n]);
+  ret = (uint32_t)window & (((uint32_t)1 << n) - 1);
+  OD_ASSERT(n > 0);
+  return bitreverse(ret << (32 - n));
 }
 
-int flushbits(stream_t *str, int n)
+void flushbits(stream_t *str, int n)
 {
-  str->incnt -= n;
-  str->bitcnt += n;
-  return 0;
+  OD_ASSERT(str->ec.nend_bits >= n);
+  str->ec.end_window >>= n;
+  str->ec.nend_bits -= n;
 }
diff --git a/dec/getbits.h b/dec/getbits.h
index 1377b11..18f98ea 100644
--- a/dec/getbits.h
+++ b/dec/getbits.h
@@ -28,23 +28,38 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define _GETBITS_H_
 
 #include <stdio.h>
+#include "entdec.h"
 
 typedef struct
 {
+  od_ec_dec ec;
   FILE *infile;
-  unsigned char rdbfr[2051];
-  unsigned char *rdptr;
-  unsigned int inbfr;
-  int incnt;
-  int bitcnt;
-  int length;
+  unsigned char *buf;
 } stream_t;
 
 int initbits_dec(FILE *infile, stream_t *str);
-int fillbfr(stream_t *str);
+
+static inline unsigned bitreverse(unsigned val)
+{
+  val = ((val >> 16) & 0x0000FFFFU) | ((val <<16) & 0xFFFF0000U);
+  val = ((val >> 8) & 0x00FF00FFU) | ((val << 8) & 0xFF00FF00U);
+  val = ((val >> 4) & 0x0F0F0F0FU) | ((val << 4) & 0xF0F0F0F0U);
+  val = ((val >> 2) & 0x33333333U) | ((val << 2) & 0xCCCCCCCCU);
+  return ((val >> 1) & 0x55555555U) | ((val << 1) & 0xAAAAAAAAUL);
+}
+
 unsigned int showbits(stream_t *str, int n);
-unsigned int getbits1(stream_t *str);
-int flushbits(stream_t *str, int n);
-unsigned int getbits(stream_t *str, int n);
+
+static inline unsigned int getbits1(stream_t *str)
+{
+  return od_ec_dec_bits(&str->ec, 1);
+}
+
+void flushbits(stream_t *str, int n);
+
+static inline unsigned int getbits(stream_t *str, int n)
+{
+  return n > 0 ? bitreverse(od_ec_dec_bits(&str->ec, n) << (32 - n)) : 0;
+}
 
 #endif
diff --git a/dec/maindec.c b/dec/maindec.c
index 6a5dc0d..6fe39c3 100644
--- a/dec/maindec.c
+++ b/dec/maindec.c
@@ -126,6 +126,7 @@ int main(int argc, char** argv)
     int decode_frame_num = 0;
     int frame_count = 0;
     int last_frame_output = -1;
+    int done = 0;
     int width;
     int height;
     int r;
@@ -139,13 +140,14 @@ int main(int argc, char** argv)
 	  int input_file_size = ftell(infile);
 	  fseek(infile, 0, SEEK_SET);
     
+    memset(&stream, 0, sizeof(stream));
     initbits_dec(infile, &stream);
 
     decoder_info.stream = &stream;
 
     memset(&decoder_info.bit_count,0,sizeof(bit_count_t));
 
-    int bit_start = stream.bitcnt;
+    int bit_start = od_ec_dec_tell(&stream.ec);
     /* Read sequence header */
     width = getbits(&stream,16);
     height = getbits(&stream,16);
@@ -176,7 +178,8 @@ int main(int argc, char** argv)
     decoder_info.use_block_contexts = getbits(&stream,1);
     decoder_info.bipred = getbits(&stream,1);
 
-    decoder_info.bit_count.sequence_header += (stream.bitcnt - bit_start);
+    decoder_info.bit_count.sequence_header +=
+     (od_ec_dec_tell(&stream.ec) - bit_start);
 
     for (r=0;r<MAX_REORDER_BUFFER;r++){
       create_yuv_frame(&rec[r],width,height,0,0,0,0);
@@ -194,7 +197,7 @@ int main(int argc, char** argv)
 
     decoder_info.deblock_data = (deblock_data_t *)malloc((height/MIN_PB_SIZE) * (width/MIN_PB_SIZE) * sizeof(deblock_data_t));
 
-    while (stream.bitcnt < 8*input_file_size - 8)
+    do
     {
       decoder_info.frame_info.decode_order_frame_num = decode_frame_num;
       decoder_info.frame_info.display_frame_num = (frame_count/sub_gop)*sub_gop+reorder_frame_offset(frame_count % sub_gop, sub_gop,decoder_info.dyadic_coding);
@@ -203,6 +206,7 @@ int main(int argc, char** argv)
         decoder_info.rec = &rec[rec_buffer_idx];
         decoder_info.rec->frame_num = decoder_info.frame_info.display_frame_num;
         decode_frame(&decoder_info);
+        done = initbits_dec(infile, &stream);
         rec_available[rec_buffer_idx]=1;
 
         rec_buffer_idx = (last_frame_output+1)%MAX_REORDER_BUFFER;
@@ -212,11 +216,12 @@ int main(int argc, char** argv)
           rec_available[rec_buffer_idx] = 0;
         }
         printf("decode_frame_num=%4d display_frame_num=%4d input_file_size=%12d bitcnt=%12d\n",
-            decode_frame_num,decoder_info.frame_info.display_frame_num,input_file_size,stream.bitcnt);
+            decode_frame_num,decoder_info.frame_info.display_frame_num,input_file_size,od_ec_dec_tell(&stream.ec));
         decode_frame_num++;
       }
       frame_count++;
     }
+    while (!done);
     // Output the tail
     int i,j;
     for (i=1; i<=MAX_REORDER_BUFFER; ++i) {
diff --git a/dec/read_bits.c b/dec/read_bits.c
index db18d9a..ad73578 100644
--- a/dec/read_bits.c
+++ b/dec/read_bits.c
@@ -253,13 +253,12 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
   int16_t *coeff_v = block_info->coeffq_v;
 
   zerovec.y = zerovec.x = 0;
-  bit_start = stream->bitcnt;
 
   mode = decoder_info->mode;
   int coeff_block_type = (mode == MODE_INTRA)<<1;
 
   /* Initialize bit counter for statistical purposes */
-  bit_start = stream->bitcnt;
+  bit_start = od_ec_dec_tell(&stream->ec);
 
   if (mode == MODE_SKIP){
     /* Derive skip vector candidates and number of skip vector candidates from neighbour blocks */
@@ -291,7 +290,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
     }
     else
       skip_idx = 0;
-    decoder_info->bit_count.skip_idx[stat_frame_type] += (stream->bitcnt - bit_start);
+    decoder_info->bit_count.skip_idx[stat_frame_type] +=
+     (od_ec_dec_tell(&stream->ec) - bit_start);
 
     block_info->num_skip_vec = num_skip_vec;
     block_info->pred_data.skip_idx = skip_idx;
@@ -352,7 +352,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
     }
     else
       skip_idx = 0;
-    decoder_info->bit_count.skip_idx[stat_frame_type] += (stream->bitcnt - bit_start);
+    decoder_info->bit_count.skip_idx[stat_frame_type] +=
+     (od_ec_dec_tell(&stream->ec) - bit_start);
 
     block_info->num_skip_vec = num_skip_vec;
     block_info->pred_data.skip_idx = skip_idx;
@@ -450,7 +451,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
       read_mv(stream,&mv_arr[2],&mvp2);
       read_mv(stream,&mv_arr[3],&mvp2);
     }
-    decoder_info->bit_count.mv[stat_frame_type] += (stream->bitcnt - bit_start);
+    decoder_info->bit_count.mv[stat_frame_type] +=
+     (od_ec_dec_tell(&stream->ec) - bit_start);
     block_info->pred_data.ref_idx0 = ref_idx;
     block_info->pred_data.ref_idx1 = ref_idx;
     block_info->pred_data.dir = 0;
@@ -554,7 +556,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
     block_info->pred_data.dir = 2;
     int combined_ref = block_info->pred_data.ref_idx0 * decoder_info->frame_info.num_ref + block_info->pred_data.ref_idx1;
     decoder_info->bit_count.bi_ref[stat_frame_type][combined_ref] += 1;
-    decoder_info->bit_count.mv[stat_frame_type] += (stream->bitcnt - bit_start);
+    decoder_info->bit_count.mv[stat_frame_type] +=
+     (od_ec_dec_tell(&stream->ec) - bit_start);
   }
 
   else if (mode==MODE_INTRA){
@@ -606,7 +609,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
       intra_mode = intra_mode_map_inv[code];
     }
 
-    decoder_info->bit_count.intra_mode[stat_frame_type] += (stream->bitcnt - bit_start);
+    decoder_info->bit_count.intra_mode[stat_frame_type] +=
+     (od_ec_dec_tell(&stream->ec) - bit_start);
     decoder_info->bit_count.size_and_intra_mode[stat_frame_type][log2i(size)-3][intra_mode] += 1;
 
     block_info->pred_data.intra_mode = intra_mode;
@@ -623,7 +627,7 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
     int tmp,cbp2;
     int cbp_table[8] = {1,0,5,2,6,3,7,4};
 
-    bit_start = stream->bitcnt;
+    bit_start = od_ec_dec_tell(&stream->ec);
     code = get_vlc(0,stream);
 
     if (decoder_info->tb_split_enable && (mode==MODE_INTRA || (mode==MODE_INTER && PBpart==0))){
@@ -636,7 +640,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
       tb_split = 0;
     }
     block_info->tb_split = tb_split;
-    decoder_info->bit_count.cbp[stat_frame_type] += (stream->bitcnt - bit_start);
+    decoder_info->bit_count.cbp[stat_frame_type] +=
+     (od_ec_dec_tell(&stream->ec) - bit_start);
 
     if (tb_split == 0){
       tmp = 0;
@@ -670,25 +675,28 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
       block_info->cbp = cbp;
 
       if (cbp.y){
-        bit_start = stream->bitcnt;
+        bit_start = od_ec_dec_tell(&stream->ec);
         read_coeff(stream,coeff_y,sizeY,coeff_block_type|0);
-        decoder_info->bit_count.coeff_y[stat_frame_type] += (stream->bitcnt - bit_start);
+        decoder_info->bit_count.coeff_y[stat_frame_type] +=
+         (od_ec_dec_tell(&stream->ec) - bit_start);
       }
       else
         memset(coeff_y,0,sizeY*sizeY*sizeof(int16_t));
 
       if (cbp.u){
-        bit_start = stream->bitcnt;
+        bit_start = od_ec_dec_tell(&stream->ec);
         read_coeff(stream,coeff_u,sizeC,coeff_block_type|1);
-        decoder_info->bit_count.coeff_u[stat_frame_type] += (stream->bitcnt - bit_start);
+        decoder_info->bit_count.coeff_u[stat_frame_type] +=
+         (od_ec_dec_tell(&stream->ec) - bit_start);
       }
       else
         memset(coeff_u,0,sizeC*sizeC*sizeof(int16_t));
 
       if (cbp.v){
-        bit_start = stream->bitcnt;
+        bit_start = od_ec_dec_tell(&stream->ec);
         read_coeff(stream,coeff_v,size/2,coeff_block_type|1);
-        decoder_info->bit_count.coeff_v[stat_frame_type] += (stream->bitcnt - bit_start);
+        decoder_info->bit_count.coeff_v[stat_frame_type] +=
+         (od_ec_dec_tell(&stream->ec) - bit_start);
       }
       else
         memset(coeff_v,0,sizeC*sizeC*sizeof(int16_t));
@@ -700,7 +708,7 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
 
         /* Loop over 4 TUs */
         for (index=0;index<4;index++){
-          bit_start = stream->bitcnt;
+          bit_start = od_ec_dec_tell(&stream->ec);
           code = get_vlc(0,stream);
           int tmp = 0;
           while (code != cbp_table[tmp] && tmp < 8) tmp++;
@@ -711,7 +719,8 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
           cbp.v = ((tmp>>2)&1);
 
           /* Updating statistics for CBP */
-          decoder_info->bit_count.cbp[stat_frame_type] += (stream->bitcnt - bit_start);
+          decoder_info->bit_count.cbp[stat_frame_type] +=
+           (od_ec_dec_tell(&stream->ec) - bit_start);
           decoder_info->bit_count.cbp_stat[stat_frame_type][cbp.y + (cbp.u<<1) + (cbp.v<<2)] += 1;
 
           /* Decode coefficients for this TU */
@@ -719,9 +728,10 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
           /* Y */
           coeff = coeff_y + index*sizeY/2*sizeY/2;
           if (cbp.y){
-            bit_start = stream->bitcnt;
+            bit_start = od_ec_dec_tell(&stream->ec);
             read_coeff(stream,coeff,sizeY/2,coeff_block_type|0);
-            decoder_info->bit_count.coeff_y[stat_frame_type] += (stream->bitcnt - bit_start);
+            decoder_info->bit_count.coeff_y[stat_frame_type] +=
+             (od_ec_dec_tell(&stream->ec) - bit_start);
           }
           else{
             memset(coeff,0,sizeY/2*sizeY/2*sizeof(int16_t));
@@ -730,9 +740,10 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
           /* U */
           coeff = coeff_u + index*sizeC/2*sizeC/2;
           if (cbp.u){
-            bit_start = stream->bitcnt;
+            bit_start = od_ec_dec_tell(&stream->ec);
             read_coeff(stream,coeff,sizeC/2,coeff_block_type|1);
-            decoder_info->bit_count.coeff_u[stat_frame_type] += (stream->bitcnt - bit_start);
+            decoder_info->bit_count.coeff_u[stat_frame_type] +=
+             (od_ec_dec_tell(&stream->ec) - bit_start);
           }
           else{
             memset(coeff,0,sizeC/2*sizeC/2*sizeof(int16_t));
@@ -741,9 +752,10 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
           /* V */
           coeff = coeff_v + index*sizeC/2*sizeC/2;
           if (cbp.v){
-            bit_start = stream->bitcnt;
+            bit_start = od_ec_dec_tell(&stream->ec);
             read_coeff(stream,coeff,sizeC/2,coeff_block_type|1);
-            decoder_info->bit_count.coeff_v[stat_frame_type] += (stream->bitcnt - bit_start);
+            decoder_info->bit_count.coeff_v[stat_frame_type] +=
+             (od_ec_dec_tell(&stream->ec) - bit_start);
           }
           else{
             memset(coeff,0,sizeC/2*sizeC/2*sizeof(int16_t));
@@ -759,23 +771,25 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
 
         /* Loop over 4 TUs */
         for (index=0;index<4;index++){
-          bit_start = stream->bitcnt;
+          bit_start = od_ec_dec_tell(&stream->ec);
           cbp.y = getbits(stream,1);
-          decoder_info->bit_count.cbp[stat_frame_type] += (stream->bitcnt - bit_start);
+          decoder_info->bit_count.cbp[stat_frame_type] +=
+           (od_ec_dec_tell(&stream->ec) - bit_start);
 
           /* Y */
           coeff = coeff_y + index*sizeY/2*sizeY/2;
           if (cbp.y){
-            bit_start = stream->bitcnt;
+            bit_start = od_ec_dec_tell(&stream->ec);
             read_coeff(stream,coeff,sizeY/2,coeff_block_type|0);
-            decoder_info->bit_count.coeff_y[stat_frame_type] += (stream->bitcnt - bit_start);
+            decoder_info->bit_count.coeff_y[stat_frame_type] +=
+             (od_ec_dec_tell(&stream->ec) - bit_start);
           }
           else{
             memset(coeff,0,sizeY/2*sizeY/2*sizeof(int16_t));
           }
         }
 
-        bit_start = stream->bitcnt;
+        bit_start = od_ec_dec_tell(&stream->ec);
         int tmp;
         tmp = getbits(stream,1);
         if (tmp){
@@ -799,18 +813,21 @@ int read_block(decoder_info_t *decoder_info,stream_t *stream,block_info_dec_t *b
             }
           }
         }
-        decoder_info->bit_count.cbp[stat_frame_type] += (stream->bitcnt - bit_start);
+        decoder_info->bit_count.cbp[stat_frame_type] +=
+         (od_ec_dec_tell(&stream->ec) - bit_start);
         if (cbp.u){
-          bit_start = stream->bitcnt;
+          bit_start = od_ec_dec_tell(&stream->ec);
           read_coeff(stream,coeff_u,sizeC,coeff_block_type|1);
-          decoder_info->bit_count.coeff_u[stat_frame_type] += (stream->bitcnt - bit_start);
+          decoder_info->bit_count.coeff_u[stat_frame_type] +=
+           (od_ec_dec_tell(&stream->ec) - bit_start);
         }
         else
           memset(coeff_u,0,sizeC*sizeC*sizeof(int16_t));
         if (cbp.v){
-          bit_start = stream->bitcnt;
+          bit_start = od_ec_dec_tell(&stream->ec);
           read_coeff(stream,coeff_v,size/2,coeff_block_type|1);
-          decoder_info->bit_count.coeff_v[stat_frame_type] += (stream->bitcnt - bit_start);
+          decoder_info->bit_count.coeff_v[stat_frame_type] +=
+           (od_ec_dec_tell(&stream->ec) - bit_start);
         }
         else
           memset(coeff_v,0,sizeC*sizeC*sizeof(int16_t));
diff --git a/enc/encode_block.c b/enc/encode_block.c
index dba8fc2..dc8c693 100644
--- a/enc/encode_block.c
+++ b/enc/encode_block.c
@@ -2942,6 +2942,9 @@ int process_block(encoder_info_t *encoder_info,int size,int ypos,int xpos,int qp
       write_data.size = size;
       write_data.block_context = block_info.block_context;
       write_data.frame_type = frame_type;
+      // We can't use encode_rectangular_size here directly, because it is
+      // never true for I frames. !encode_this_size is what we actually want.
+      write_data.encode_rectangular_size = !encode_this_size;
       write_super_mode(stream, &write_data, split_flag);
     }
     else{
diff --git a/enc/entenc.c b/enc/entenc.c
new file mode 100644
index 0000000..f83250d
--- /dev/null
+++ b/enc/entenc.c
@@ -0,0 +1,706 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include "entenc.h"
+
+/*A range encoder.
+  See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
+
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video \& Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul,
+   URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
+  }*/
+
+/*Takes updated low and range values, renormalizes them so that
+   32768 <= rng < 65536 (flushing bytes from low to the pre-carry buffer if
+   necessary), and stores them back in the encoder context.
+  low: The new value of low.
+  rng: The new value of the range.*/
+static void od_ec_enc_normalize(od_ec_enc *enc,
+ od_ec_window low, unsigned rng) {
+  int d;
+  int c;
+  int s;
+  c = enc->cnt;
+  OD_ASSERT(rng <= 65535U);
+  d = 16 - OD_ILOG_NZ(rng);
+  s = c + d;
+  /*TODO: Right now we flush every time we have at least one byte available.
+    Instead we should use an od_ec_window and flush right before we're about to
+     shift bits off the end of the window.
+    For a 32-bit window this is about the same amount of work, but for a 64-bit
+     window it should be a fair win.*/
+  if (s >= 0) {
+    uint16_t *buf;
+    uint32_t storage;
+    uint32_t offs;
+    unsigned m;
+    buf = enc->precarry_buf;
+    storage = enc->precarry_storage;
+    offs = enc->offs;
+    if (offs + 2 > storage) {
+      storage = 2*storage + 2;
+      buf = (uint16_t *)realloc(buf, sizeof(*buf)*storage);
+      if (buf == NULL) {
+        enc->error = -1;
+        enc->offs = 0;
+        return;
+      }
+      enc->precarry_buf = buf;
+      enc->precarry_storage = storage;
+    }
+    c += 16;
+    m = (1 << c) - 1;
+    if (s >= 8) {
+      OD_ASSERT(offs < storage);
+      buf[offs++] = (uint16_t)(low >> c);
+      low &= m;
+      c -= 8;
+      m >>= 8;
+    }
+    OD_ASSERT(offs < storage);
+    buf[offs++] = (uint16_t)(low >> c);
+    s = c + d - 24;
+    low &= m;
+    enc->offs = offs;
+  }
+  enc->low = low << d;
+  enc->rng = rng << d;
+  enc->cnt = s;
+}
+
+/*Initializes the encoder.
+  size: The initial size of the buffer, in bytes.*/
+void od_ec_enc_init(od_ec_enc *enc, uint32_t size) {
+  od_ec_enc_reset(enc);
+  enc->buf = (unsigned char *)malloc(sizeof(*enc->buf)*size);
+  enc->storage = size;
+  if (size > 0 && enc->buf == NULL) {
+    enc->storage = 0;
+    enc->error = -1;
+  }
+  enc->precarry_buf =
+   (uint16_t *)malloc(sizeof(*enc->precarry_buf)*size);
+  enc->precarry_storage = size;
+  if (size > 0 && enc->precarry_buf == NULL) {
+    enc->precarry_storage = 0;
+    enc->error = -1;
+  }
+}
+
+/*Reinitializes the encoder.*/
+void od_ec_enc_reset(od_ec_enc *enc) {
+  enc->end_offs = 0;
+  enc->end_window = 0;
+  enc->nend_bits = 0;
+  enc->offs = 0;
+  enc->low = 0;
+  enc->rng = 0x8000;
+  /*This is initialized to -9 so that it crosses zero after we've accumulated
+     one byte + one carry bit.*/
+  enc->cnt = -9;
+  enc->error = 0;
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy = 0;
+  enc->nb_symbols = 0;
+#endif
+}
+
+/*Frees the buffers used by the encoder.*/
+void od_ec_enc_clear(od_ec_enc *enc) {
+  free(enc->precarry_buf);
+  free(enc->buf);
+}
+
+/*Encodes a symbol given its scaled frequency information.
+  The frequency information must be discernable by the decoder, assuming it
+   has read only the previous symbols from the stream.
+  You can change the frequency information, or even the entire source alphabet,
+   so long as the decoder can tell from the context of the previously encoded
+   information that it is supposed to do so as well.
+  fl: The cumulative frequency of all symbols that come before the one to be
+       encoded.
+  fh: The cumulative frequency of all symbols up to and including the one to
+       be encoded.
+      Together with fl, this defines the range [fl, fh) in which the decoded
+       value will fall.
+  ft: The sum of the frequencies of all the symbols.
+      This must be at least 16384, and no more than 32768.*/
+static void od_ec_encode(od_ec_enc *enc,
+ unsigned fl, unsigned fh, unsigned ft) {
+  od_ec_window l;
+  unsigned r;
+  int s;
+  unsigned d;
+  unsigned u;
+  unsigned v;
+  OD_ASSERT(fl < fh);
+  OD_ASSERT(fh <= ft);
+  OD_ASSERT(16384 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  l = enc->low;
+  r = enc->rng;
+  OD_ASSERT(ft <= r);
+  s = r - ft >= ft;
+  ft <<= s;
+  fl <<= s;
+  fh <<= s;
+  d = r - ft;
+  OD_ASSERT(d < ft);
+#if OD_EC_REDUCED_OVERHEAD
+  {
+    unsigned e;
+    e = OD_SUBSATU(2*d, ft);
+    u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
+    v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
+  }
+#else
+  u = fl + OD_MINI(fl, d);
+  v = fh + OD_MINI(fh, d);
+#endif
+  r = v - u;
+  l += u;
+  od_ec_enc_normalize(enc, l, r);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy -= OD_LOG2((double)(fh - fl)/ft);
+  enc->nb_symbols++;
+#endif
+}
+
+/*Equivalent to od_ec_encode() with ft == 32768.
+  fl: The cumulative frequency of all symbols that come before the one to be
+       encoded.
+  fh: The cumulative frequency of all symbols up to and including the one to
+       be encoded.*/
+static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) {
+  od_ec_window l;
+  unsigned r;
+  unsigned d;
+  unsigned u;
+  unsigned v;
+  OD_ASSERT(fl < fh);
+  OD_ASSERT(fh <= 32768U);
+  l = enc->low;
+  r = enc->rng;
+  OD_ASSERT(32768U <= r);
+  d = r - 32768U;
+  OD_ASSERT(d < 32768U);
+#if OD_EC_REDUCED_OVERHEAD
+  {
+    unsigned e;
+    e = OD_SUBSATU(2*d, 32768U);
+    u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
+    v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
+  }
+#else
+  u = fl + OD_MINI(fl, d);
+  v = fh + OD_MINI(fh, d);
+#endif
+  r = v - u;
+  l += u;
+  od_ec_enc_normalize(enc, l, r);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy -= OD_LOG2((double)(fh - fl)/32768.);
+  enc->nb_symbols++;
+#endif
+}
+
+/*Encodes a symbol given its frequency information with an arbitrary scale.
+  This operates just like od_ec_encode(), but does not require that ft be at
+   least 16384.
+  fl: The cumulative frequency of all symbols that come before the one to be
+       encoded.
+  fh: The cumulative frequency of all symbols up to and including the one to
+       be encoded.
+  ft: The sum of the frequencies of all the symbols.
+      This must be at least 2 and no more than 32768.*/
+static void od_ec_encode_unscaled(od_ec_enc *enc,
+ unsigned fl, unsigned fh, unsigned ft) {
+  int s;
+  OD_ASSERT(fl < fh);
+  OD_ASSERT(fh <= ft);
+  OD_ASSERT(2 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  s = 15 - OD_ILOG_NZ(ft - 1);
+  od_ec_encode(enc, fl << s, fh << s, ft << s);
+}
+
+/*Encode a bit that has an fz/ft probability of being a zero.
+  val: The value to encode (0 or 1).
+  fz: The probability that val is zero, scaled by ft.
+  ft: The total probability.
+      This must be at least 16384 and no more than 32768.*/
+void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned ft) {
+  od_ec_window l;
+  unsigned r;
+  int s;
+  unsigned v;
+  OD_ASSERT(0 < fz);
+  OD_ASSERT(fz < ft);
+  OD_ASSERT(16384 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  l = enc->low;
+  r = enc->rng;
+  OD_ASSERT(ft <= r);
+  s = r - ft >= ft;
+  ft <<= s;
+  fz <<= s;
+  OD_ASSERT(r - ft < ft);
+#if OD_EC_REDUCED_OVERHEAD
+  {
+    unsigned d;
+    unsigned e;
+    d = r - ft;
+    e = OD_SUBSATU(2*d, ft);
+    v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
+  }
+#else
+  v = fz + OD_MINI(fz, r - ft);
+#endif
+  if (val) l += v;
+  r = val ? r - v : v;
+  od_ec_enc_normalize(enc, l, r);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy -= OD_LOG2((double)(val ? ft - fz : fz)/ft);
+  enc->nb_symbols++;
+#endif
+}
+
+/*Equivalent to od_ec_encode_bool() with ft == 32768.
+  val: The value to encode (0 or 1).
+  fz: The probability that _val is zero, scaled by 32768.*/
+void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz) {
+  od_ec_window l;
+  unsigned r;
+  unsigned v;
+  OD_ASSERT(0 < fz);
+  OD_ASSERT(fz < 32768U);
+  l = enc->low;
+  r = enc->rng;
+  OD_ASSERT(32768U <= r);
+#if OD_EC_REDUCED_OVERHEAD
+  {
+    unsigned d;
+    unsigned e;
+    d = r - 32768U;
+    e = OD_SUBSATU(2*d, 32768U);
+    v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
+  }
+#else
+  v = fz + OD_MINI(fz, r - 32768U);
+#endif
+  if (val) l += v;
+  r = val ? r - v : v;
+  od_ec_enc_normalize(enc, l, r);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy -= OD_LOG2((double)(val ? 32768 - fz : fz)/32768.);
+  enc->nb_symbols++;
+#endif
+}
+
+/*Encodes a symbol given a cumulative distribution function (CDF) table.
+  s: The index of the symbol to encode.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-decreasing, and the last value
+        must be at least 16384, and no more than 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.*/
+void od_ec_encode_cdf(od_ec_enc *enc, int s,
+ const uint16_t *cdf, int nsyms) {
+  OD_ASSERT(s >= 0);
+  OD_ASSERT(s < nsyms);
+  od_ec_encode(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]);
+}
+
+/*Equivalent to od_ec_encode_cdf() with cdf[nsyms - 1] == 32768U.
+  s: The index of the symbol to encode.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-decreasing, and the last value
+        must be exactly 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.*/
+void od_ec_encode_cdf_q15(od_ec_enc *enc, int s,
+ const uint16_t *cdf, int nsyms) {
+  (void)nsyms;
+  OD_ASSERT(s >= 0);
+  OD_ASSERT(s < nsyms);
+  OD_ASSERT(cdf[nsyms - 1] == 32768U);
+  od_ec_encode_q15(enc, s > 0 ? cdf[s - 1] : 0, cdf[s]);
+}
+
+/*Encodes a symbol given a cumulative distribution function (CDF) table.
+  s: The index of the symbol to encode.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-decreasing, and the last value
+        must be at least 2, and no more than 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.*/
+void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s,
+ const uint16_t *cdf, int nsyms) {
+  OD_ASSERT(s >= 0);
+  OD_ASSERT(s < nsyms);
+  od_ec_encode_unscaled(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]);
+}
+
+/*Equivalent to od_ec_encode_cdf() with cdf[nsyms - 1] == 1 << ftb.
+  s: The index of the symbol to encode.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-decreasing, and the last value
+        must be exactly 1 << ftb.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  ftb: The number of bits of precision in the cumulative distribution.
+       This must be no more than 15.*/
+void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s,
+ const uint16_t *cdf, int nsyms, unsigned ftb) {
+  (void)nsyms;
+  OD_ASSERT(s >= 0);
+  OD_ASSERT(s < nsyms);
+  OD_ASSERT(ftb <= 15);
+  OD_ASSERT(cdf[nsyms - 1] == 1U << ftb);
+  od_ec_encode_q15(enc,
+   s > 0 ? cdf[s - 1] << (15 - ftb) : 0, cdf[s] << (15 - ftb));
+}
+
+/*Encodes a raw unsigned integer in the stream.
+  fl: The integer to encode.
+  ft: The number of integers that can be encoded (one more than the max).
+      This must be at least 2, and no more than 2**29.*/
+void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft) {
+  OD_ASSERT(ft >= 2);
+  OD_ASSERT(fl < ft);
+  OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS));
+  if (ft > 1U << OD_EC_UINT_BITS) {
+    int ft1;
+    int ftb;
+    ft--;
+    ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
+    ft1 = (int)(ft >> ftb) + 1;
+    od_ec_encode_cdf_q15(enc, (int)(fl >> ftb), OD_UNIFORM_CDF_Q15(ft1), ft1);
+    od_ec_enc_bits(enc, fl & (((uint32_t)1 << ftb) - 1), ftb);
+  }
+  else od_ec_encode_cdf_q15(enc, (int)fl, OD_UNIFORM_CDF_Q15(ft), (int)ft);
+}
+
+/*Encodes a sequence of raw bits in the stream.
+  fl: The bits to encode.
+  ftb: The number of bits to encode.
+       This must be between 0 and 25, inclusive.*/
+void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb) {
+  od_ec_window end_window;
+  int nend_bits;
+  OD_ASSERT(ftb <= 25);
+  OD_ASSERT(fl < (uint32_t)1 << ftb);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy += ftb;
+#endif
+  end_window = enc->end_window;
+  nend_bits = enc->nend_bits;
+  if (nend_bits + ftb > OD_EC_WINDOW_SIZE) {
+    unsigned char *buf;
+    uint32_t storage;
+    uint32_t end_offs;
+    buf = enc->buf;
+    storage = enc->storage;
+    end_offs = enc->end_offs;
+    if (end_offs + (OD_EC_WINDOW_SIZE >> 3) >= storage) {
+      unsigned char *new_buf;
+      uint32_t new_storage;
+      new_storage = 2*storage + (OD_EC_WINDOW_SIZE >> 3);
+      new_buf = (unsigned char *)malloc(sizeof(*new_buf)*new_storage);
+      if (new_buf == NULL) {
+        enc->error = -1;
+        enc->end_offs = 0;
+        return;
+      }
+      OD_COPY(new_buf + new_storage - end_offs,
+       buf + storage - end_offs, end_offs);
+      storage = new_storage;
+      free(buf);
+      enc->buf = buf = new_buf;
+      enc->storage = storage;
+    }
+    do {
+      OD_ASSERT(end_offs < storage);
+      buf[storage - ++end_offs] = (unsigned char)end_window;
+      end_window >>= 8;
+      nend_bits -= 8;
+    }
+    while (nend_bits >= 8);
+    enc->end_offs = end_offs;
+  }
+  OD_ASSERT(nend_bits + ftb <= OD_EC_WINDOW_SIZE);
+  end_window |= (od_ec_window)fl << nend_bits;
+  nend_bits += ftb;
+  enc->end_window = end_window;
+  enc->nend_bits = nend_bits;
+}
+
+/*Overwrites a few bits at the very start of an existing stream, after they
+   have already been encoded.
+  This makes it possible to have a few flags up front, where it is easy for
+   decoders to access them without parsing the whole stream, even if their
+   values are not determined until late in the encoding process, without having
+   to buffer all the intermediate symbols in the encoder.
+  In order for this to work, at least nbits bits must have already been encoded
+   using probabilities that are an exact power of two.
+  The encoder can verify the number of encoded bits is sufficient, but cannot
+   check this latter condition.
+  val: The bits to encode (in the least nbits significant bits).
+       They will be decoded in order from most-significant to least.
+  nbits: The number of bits to overwrite.
+         This must be no more than 8.*/
+void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits) {
+  int shift;
+  unsigned mask;
+  OD_ASSERT(nbits >= 0);
+  OD_ASSERT(nbits <= 8);
+  OD_ASSERT(val < 1U << nbits);
+  shift = 8 - nbits;
+  mask = ((1U << nbits) - 1) << shift;
+  if (enc->offs > 0) {
+    /*The first byte has been finalized.*/
+    enc->precarry_buf[0] =
+     (uint16_t)((enc->precarry_buf[0] & ~mask) | val << shift);
+  }
+  else if (9 + enc->cnt + (enc->rng == 0x8000) > nbits) {
+    /*The first byte has yet to be output.*/
+    enc->low = (enc->low & ~((od_ec_window)mask << (16 + enc->cnt))) |
+     (od_ec_window)val << (16 + enc->cnt + shift);
+  }
+  /*The encoder hasn't even encoded _nbits of data yet.*/
+  else enc->error = -1;
+}
+
+#if OD_MEASURE_EC_OVERHEAD
+# include <stdio.h>
+#endif
+
+/*Indicates that there are no more symbols to encode.
+  All remaining output bytes are flushed to the output buffer.
+  od_ec_enc_reset() should be called before using the encoder again.
+  bytes: Returns the size of the encoded data in the returned buffer.
+  Return: A pointer to the start of the final buffer, or NULL if there was an
+           encoding error.*/
+unsigned char *od_ec_enc_done(od_ec_enc *enc, uint32_t *nbytes) {
+  unsigned char *out;
+  uint32_t storage;
+  uint16_t *buf;
+  uint32_t offs;
+  uint32_t end_offs;
+  int nend_bits;
+  od_ec_window m;
+  od_ec_window e;
+  od_ec_window l;
+  unsigned r;
+  int c;
+  int s;
+  if (enc->error) return NULL;
+#if OD_MEASURE_EC_OVERHEAD
+  {
+    uint32_t tell;
+    /* Don't count the 1 bit we lose to raw bits as overhead. */
+    tell = od_ec_enc_tell_frac(enc)/8.-1;
+    fprintf(stderr, "overhead: %f%%\n", 100*(tell-enc->entropy)/enc->entropy);
+    fprintf(stderr, "efficiency: %f bits/symbol\n",
+     (double)tell/enc->nb_symbols);
+  }
+#endif
+  /*We output the minimum number of bits that ensures that the symbols encoded
+     thus far will be decoded correctly regardless of the bits that follow.*/
+  l = enc->low;
+  r = enc->rng;
+  c = enc->cnt;
+  s = 9;
+  m = 0x7FFF;
+  e = (l + m) & ~m;
+  while ((e | m) >= l + r) {
+    s++;
+    m >>= 1;
+    e = (l + m) & ~m;
+  }
+  s += c;
+  offs = enc->offs;
+  buf = enc->precarry_buf;
+  if (s > 0) {
+    unsigned n;
+    storage = enc->precarry_storage;
+    if (offs + ((s + 7) >> 3) > storage) {
+      storage = storage*2 + ((s + 7) >> 3);
+      buf = (uint16_t *)realloc(buf, sizeof(*buf)*storage);
+      if (buf == NULL) {
+        enc->error = -1;
+        return NULL;
+      }
+      enc->precarry_buf = buf;
+      enc->precarry_storage = storage;
+    }
+    n = (1 << (c + 16)) - 1;
+    do {
+      OD_ASSERT(offs < storage);
+      buf[offs++] = (uint16_t)(e >> (c + 16));
+      e &= n;
+      s -= 8;
+      c -= 8;
+      n >>= 8;
+    }
+    while (s > 0);
+  }
+  /*Make sure there's enough room for the entropy-coded bits and the raw
+     bits.*/
+  out = enc->buf;
+  storage = enc->storage;
+  end_offs = enc->end_offs;
+  e = enc->end_window;
+  nend_bits = enc->nend_bits;
+  s = -s;
+  c = OD_MAXI((nend_bits - s + 7) >> 3, 0);
+  if (offs + end_offs + c > storage) {
+    storage = offs + end_offs + c;
+    out = (unsigned char *)realloc(out, sizeof(*out)*storage);
+    if (out == NULL) {
+      enc->error = -1;
+      return NULL;
+    }
+    OD_MOVE(out + storage - end_offs, out + enc->storage - end_offs, end_offs);
+    enc->buf = out;
+    enc->storage = storage;
+  }
+  /*If we have buffered raw bits, flush them as well.*/
+  while (nend_bits > s) {
+    OD_ASSERT(end_offs < storage);
+    out[storage - ++end_offs] = (unsigned char)e;
+    e >>= 8;
+    nend_bits -= 8;
+  }
+  *nbytes = offs + end_offs;
+  /*Perform carry propagation.*/
+  OD_ASSERT(offs + end_offs <= storage);
+  out = out + storage - (offs + end_offs);
+  c = 0;
+  end_offs = offs;
+  while (offs-- > 0) {
+    c = buf[offs] + c;
+    out[offs] = (unsigned char)c;
+    c >>= 8;
+  }
+  /*Add any remaining raw bits to the last byte.
+    There is guaranteed to be enough room, because nend_bits <= s.*/
+  OD_ASSERT(nend_bits <= 0 || end_offs > 0);
+  if (nend_bits > 0) out[end_offs - 1] |= (unsigned char)e;
+  /*Note: Unless there's an allocation error, if you keep encoding into the
+     current buffer and call this function again later, everything will work
+     just fine (you won't get a new packet out, but you will get a single
+     buffer with the new data appended to the old).
+    However, this function is O(N) where N is the amount of data coded so far,
+     so calling it more than once for a given packet is a bad idea.*/
+  return out;
+}
+
+/*Returns the number of bits "used" by the encoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Warning: The value returned by this function can decrease compared to an
+   earlier call, even after encoding more data, if there is an encoding error
+   (i.e., a failure to allocate enough space for the output buffer).
+  Return: The number of bits.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+int od_ec_enc_tell(od_ec_enc *enc) {
+  /*The 10 here counteracts the offset of -9 baked into cnt, and adds 1 extra
+     bit, which we reserve for terminating the stream.*/
+  return (enc->offs + enc->end_offs)*8 + enc->cnt + enc->nend_bits + 10;
+}
+
+/*Returns the number of bits "used" by the encoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Warning: The value returned by this function can decrease compared to an
+   earlier call, even after encoding more data, if there is an encoding error
+   (i.e., a failure to allocate enough space for the output buffer).
+  Return: The number of bits scaled by 2**OD_BITRES.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+uint32_t od_ec_enc_tell_frac(od_ec_enc *enc) {
+  return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng);
+}
+
+/*Saves a entropy coder checkpoint to dst.
+  This allows an encoder to reverse a series of entropy coder
+   decisions if it decides that the information would have been
+   better coded some other way.*/
+void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src) {
+  OD_COPY(dst, src, 1);
+}
+
+/*Restores an entropy coder checkpoint saved by od_ec_enc_checkpoint.
+  This can only be used to restore from checkpoints earlier in the target
+   state's history: you can not switch backwards and forwards or otherwise
+   switch to a state which isn't a casual ancestor of the current state.
+  Restore is also incompatible with patching the initial bits, as the
+   changes will remain in the restored version.*/
+void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src) {
+  unsigned char *buf;
+  uint32_t storage;
+  uint16_t *precarry_buf;
+  uint32_t precarry_storage;
+  OD_ASSERT(dst->storage >= src->storage);
+  OD_ASSERT(dst->precarry_storage >= src->precarry_storage);
+  buf = dst->buf;
+  storage = dst->storage;
+  precarry_buf = dst->precarry_buf;
+  precarry_storage = dst->precarry_storage;
+  OD_COPY(dst, src, 1);
+  dst->buf = buf;
+  dst->storage = storage;
+  dst->precarry_buf = precarry_buf;
+  dst->precarry_storage = precarry_storage;
+}
diff --git a/enc/entenc.h b/enc/entenc.h
new file mode 100644
index 0000000..e2b1ae0
--- /dev/null
+++ b/enc/entenc.h
@@ -0,0 +1,105 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_entenc_H)
+# define _entenc_H (1)
+# include <stddef.h>
+# include "entcode.h"
+typedef struct od_ec_enc od_ec_enc;
+
+#define OD_MEASURE_EC_OVERHEAD (0)
+
+/*The entropy encoder context.*/
+struct od_ec_enc {
+  /*Buffered output.
+    This contains only the raw bits until the final call to od_ec_enc_done(),
+     where all the arithmetic-coded data gets prepended to it.*/
+  unsigned char *buf;
+  /*The size of the buffer.*/
+  uint32_t storage;
+  /*The offset at which the last byte containing raw bits was written.*/
+  uint32_t end_offs;
+  /*Bits that will be read from/written at the end.*/
+  od_ec_window end_window;
+  /*Number of valid bits in end_window.*/
+  int nend_bits;
+  /*A buffer for output bytes with their associated carry flags.*/
+  uint16_t *precarry_buf;
+  /*The size of the pre-carry buffer.*/
+  uint32_t precarry_storage;
+  /*The offset at which the next entropy-coded byte will be written.*/
+  uint32_t offs;
+  /*The low end of the current range.*/
+  od_ec_window low;
+  /*The number of values in the current range.*/
+  uint16_t rng;
+  /*The number of bits of data in the current value.*/
+  int16_t cnt;
+  /*Nonzero if an error occurred.*/
+  int error;
+#if OD_MEASURE_EC_OVERHEAD
+  double entropy;
+  int nb_symbols;
+#endif
+};
+
+/*See entenc.c for further documentation.*/
+
+void od_ec_enc_init(od_ec_enc *enc, uint32_t size) OD_ARG_NONNULL(1);
+void od_ec_enc_reset(od_ec_enc *enc) OD_ARG_NONNULL(1);
+void od_ec_enc_clear(od_ec_enc *enc) OD_ARG_NONNULL(1);
+
+void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned _ft)
+ OD_ARG_NONNULL(1);
+void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz_q15)
+ OD_ARG_NONNULL(1);
+void od_ec_encode_cdf(od_ec_enc *enc, int s,
+ const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
+void od_ec_encode_cdf_q15(od_ec_enc *enc, int s,
+ const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
+void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s,
+ const uint16_t *cdf, int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
+void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s,
+ const uint16_t *cdf, int nsyms, unsigned ftb)
+ OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
+
+void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft)
+OD_ARG_NONNULL(1);
+
+void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb)
+ OD_ARG_NONNULL(1);
+
+void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits)
+ OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT unsigned char *od_ec_enc_done(od_ec_enc *enc,
+ uint32_t *nbytes) OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+
+OD_WARN_UNUSED_RESULT int od_ec_enc_tell(od_ec_enc *enc) OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(od_ec_enc *enc)
+ OD_ARG_NONNULL(1);
+
+void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src);
+void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src);
+
+#endif
diff --git a/enc/mainenc.c b/enc/mainenc.c
index aaec81c..7e94074 100644
--- a/enc/mainenc.c
+++ b/enc/mainenc.c
@@ -171,11 +171,7 @@ int main(int argc, char **argv)
 
   /* Initialize main bit stream */
   stream_t stream;
-  stream.bitstream = (uint8_t *)malloc(MAX_BUFFER_SIZE * sizeof(uint8_t));
-  stream.bitbuf = 0;
-  stream.bitrest = 32;
-  stream.bytepos = 0;
-  stream.bytesize = MAX_BUFFER_SIZE;
+  od_ec_enc_init(&stream, MAX_BUFFER_SIZE);
 
   /* Configure encoder */
   encoder_info.params = params;
@@ -569,7 +565,7 @@ int main(int argc, char **argv)
       fflush(stdout);
 
       /* Write compressed bits for this frame to file */
-      flush_bytebuf(&stream, strfile);
+      flush_all_bits(&stream, strfile);
 
       if (reconfile){
         /* Write output frame */
@@ -601,7 +597,6 @@ int main(int argc, char **argv)
   }
 
 
-  flush_all_bits(&stream, strfile);
   bit_rate_in_kbps = 0.001*params->frame_rate*(double)acc_num_bits/num_encoded_frames;
 
   /* Finised encoding sequence */
@@ -651,7 +646,7 @@ int main(int argc, char **argv)
   {
     fclose(reconfile);
   }
-  free(stream.bitstream);
+  od_ec_enc_clear(&stream);
   free(encoder_info.deblock_data);
   delete_config_params(params);
   return 0;
diff --git a/enc/putbits.c b/enc/putbits.c
index 85345f1..427d2f9 100644
--- a/enc/putbits.c
+++ b/enc/putbits.c
@@ -30,109 +30,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "global.h"
 #include "putbits.h"
 
-static unsigned int mask[33] = {
-    0x00000000,0x00000001,0x00000003,0x00000007,
-    0x0000000f,0x0000001f,0x0000003f,0x0000007f,
-    0x000000ff,0x000001ff,0x000003ff,0x000007ff,
-    0x00000fff,0x00001fff,0x00003fff,0x00007fff,
-    0x0000ffff,0x0001ffff,0x0003ffff,0x0007ffff,
-    0x000fffff,0x001fffff,0x003fffff,0x007fffff,
-    0x00ffffff,0x01ffffff,0x03ffffff,0x07ffffff,
-    0x0fffffff,0x1fffffff,0x3fffffff,0x7fffffff,
-    0xffffffff};
-
-void flush_bytebuf(stream_t *str, FILE *outfile)
-{
-  if (outfile)
-  {
-    if (fwrite(str->bitstream,sizeof(unsigned char),str->bytepos,outfile) != str->bytepos)
-    {
-      fatalerror("Problem writing bitstream to file.");
-    }
-  }
-  str->bytepos = 0;
-}
-
-
 void flush_all_bits(stream_t *str, FILE *outfile)
 {
+  unsigned char *buf;
+  uint32_t frame_bytes;
   int i;
-  int bytes = 4 - str->bitrest/8;
-
-  printf("final flush: bytes=%4d\n",bytes);
-  if ((str->bytepos+bytes) > str->bytesize)
-  {
-    flush_bytebuf(str,outfile);
-  }
-  for (i = 0; i < bytes; i++)
-  {
-    str->bitstream[str->bytepos++] = (str->bitbuf >> (24-i*8)) & 0xff;
-  }
-
+  buf = od_ec_enc_done(str, &frame_bytes);
   if (outfile)
   {
-    if (fwrite(str->bitstream,sizeof(unsigned char),str->bytepos,outfile) != str->bytepos)
+    uint8_t frame_bytes_buf[4];
+    for (i = 0; i < 4; i++)
+    {
+      frame_bytes_buf[i] = (uint8_t)(frame_bytes >> (24 - i*8));
+    }
+    if (fwrite(frame_bytes_buf, sizeof(frame_bytes_buf), 1, outfile) != 1
+     || fwrite(buf, sizeof(*buf), frame_bytes, outfile) != frame_bytes)
     {
       fatalerror("Problem writing bitstream to file.");
     }
   }
-  str->bytepos = 0;
-}    
-                    
-
-void flush_bitbuf(stream_t *str)
-{
-  if ((str->bytepos+4) > str->bytesize)
-  {
-    fatalerror("Run out of bits in stream buffer.");
-  }
-  str->bitstream[str->bytepos++] = (str->bitbuf >> 24) & 0xff;
-  str->bitstream[str->bytepos++] = (str->bitbuf >> 16) & 0xff;
-  str->bitstream[str->bytepos++] = (str->bitbuf >> 8) & 0xff;
-  str->bitstream[str->bytepos++] = str->bitbuf & 0xff;
-  str->bitbuf = 0;
-  str->bitrest = 32;
-}
-
-void putbits(unsigned int n, unsigned int val, stream_t *str)
-{
-  unsigned int rest;
-
-  if (n <= str->bitrest)
-  {
-    str->bitbuf |= ((val & mask[n]) << (str->bitrest-n));
-    str->bitrest -= n;
-  }
-  else
-  {
-    rest = n-str->bitrest;
-    str->bitbuf |= (val >> rest) & mask[n-rest];
-    flush_bitbuf(str);
-    str->bitbuf |= (val & mask[rest]) << (32-rest);
-    str->bitrest -= rest;
-  }
-}
-
-int get_bit_pos(stream_t *str){
-  int bitpos = 8*str->bytepos + (32 - str->bitrest);
-  return bitpos; 
-}
-
-void write_stream_pos(stream_t *stream, stream_pos_t *stream_pos){
-  stream->bitrest = stream_pos->bitrest;
-  stream->bytepos = stream_pos->bytepos;
-  stream->bitbuf = stream_pos->bitbuf;
-}
-
-void read_stream_pos(stream_pos_t *stream_pos, stream_t *stream){
-  stream_pos->bitrest = stream->bitrest;
-  stream_pos->bytepos = stream->bytepos;
-  stream_pos->bitbuf = stream->bitbuf;
-}
-
-void copy_stream(stream_t *str1, stream_t *str2){
-  str1->bitrest = str2->bitrest;
-  str1->bytepos = str2->bytepos;
-  str1->bitbuf = str2->bitbuf;
-  memcpy(&(str1->bitstream[0]),&(str2->bitstream[0]),str2->bytepos*sizeof(uint8_t));
+  od_ec_enc_reset(str);
 }
diff --git a/enc/putbits.h b/enc/putbits.h
index 855bde7..5dba40e 100644
--- a/enc/putbits.h
+++ b/enc/putbits.h
@@ -31,32 +31,42 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdio.h>
 #include <stdint.h>
+#include "entenc.h"
 
-typedef struct
+typedef struct od_ec_enc stream_t;
+
+typedef struct od_ec_enc stream_pos_t;
+
+void flush_all_bits(stream_t *str, FILE *outfile);
+
+static inline uint32_t bitreverse(uint32_t val)
 {
-  uint32_t bytesize;     //Buffer size - typically maximum compressed frame size
-  uint32_t bytepos;      //Byte position in bitstream
-  uint8_t *bitstream;   //Compressed bit stream
-  uint32_t bitbuf;       //Recent bits not written the bitstream yet
-  uint32_t bitrest;      //Empty bits in bitbuf
-} stream_t;
-
-typedef struct
+  val = ((val >> 16) & 0x0000FFFFU) | ((val <<16) & 0xFFFF0000U);
+  val = ((val >> 8) & 0x00FF00FFU) | ((val << 8) & 0xFF00FF00U);
+  val = ((val >> 4) & 0x0F0F0F0FU) | ((val << 4) & 0xF0F0F0F0U);
+  val = ((val >> 2) & 0x33333333U) | ((val << 2) & 0xCCCCCCCCU);
+  return ((val >> 1) & 0x55555555U) | ((val << 1) & 0xAAAAAAAAUL);
+}
+
+static inline void putbits(unsigned int n,unsigned int val,stream_t *str)
 {
-  uint32_t bytepos;      //Byte position in bitstream
-  uint32_t bitbuf;       //Recent bits not written the bitstream yet
-  uint32_t bitrest;      //Empty bits in bitbuf
-} stream_pos_t;
+  OD_ASSERT(n > 0);
+  od_ec_enc_bits(str, bitreverse(val << (32 - n)), n);
+}
 
-void flush_all_bits(stream_t *str, FILE *outfile);
-void putbits(unsigned int n,unsigned int val,stream_t *str);
-void flush_bytebuf(stream_t *str, FILE *outfile);
-void flush_bitbuf(stream_t *str);
-int get_bit_pos(stream_t *str);
-unsigned int leading_zeros(unsigned int code);
-
-void write_stream_pos(stream_t *stream, stream_pos_t *stream_pos);
-void read_stream_pos(stream_pos_t *stream_pos, stream_t *stream);
-void copy_stream(stream_t *str1, stream_t *str2);
+static inline int get_bit_pos(stream_t *str)
+{
+  return od_ec_enc_tell(str);
+}
+
+static inline void write_stream_pos(stream_t *stream, stream_pos_t *stream_pos)
+{
+  od_ec_enc_rollback(stream, stream_pos);
+}
+
+static inline void read_stream_pos(stream_pos_t *stream_pos, stream_t *stream)
+{
+  od_ec_enc_checkpoint(stream_pos, stream);
+}
 
 #endif
diff --git a/enc/write_bits.c b/enc/write_bits.c
index a7e2119..d745b4e 100644
--- a/enc/write_bits.c
+++ b/enc/write_bits.c
@@ -371,8 +371,10 @@ void write_super_mode(stream_t *stream,write_data_t *write_data, int split_flag)
   }
   else{
     /* Split flag = 0 */
-    if (size > MIN_BLOCK_SIZE || split_flag==1)
+    if (!write_data->encode_rectangular_size
+     && (size > MIN_BLOCK_SIZE || split_flag==1)){
       putbits(1,split_flag,stream);
+    }
   }
 }