From 9c3bbcd876c8d8c51f1525c9db7e9769ae13d8b0 Mon Sep 17 00:00:00 2001 From: Bartek Fabiszewski Date: Sun, 29 Jun 2014 14:00:00 +0100 Subject: [PATCH] merge master --- COPYING | 165 ++ Makefile.am | 5 +- README | 0 README.md | 64 + autogen.sh | 1 + configure.ac | 194 +- libmobi.pc.in | 12 + src/Makefile.am | 19 +- src/buffer.c | 628 +++++- src/buffer.h | 92 +- src/compression.c | 271 ++- src/compression.h | 44 +- src/config.h | 16 + src/debug.c | 66 +- src/debug.h | 67 +- src/index.c | 402 ++++ src/index.h | 80 + src/memory.c | 403 +++- src/memory.h | 31 +- src/miniz.c | 4919 +++++++++++++++++++++++++++++++++++++++++++++ src/miniz.h | 19 + src/mobi.h | 737 ++++--- src/opf.c | 1868 +++++++++++++++++ src/opf.h | 164 ++ src/parse_rawml.c | 1656 +++++++++++++++ src/parse_rawml.h | 33 + src/read.c | 825 +++++--- src/read.h | 30 +- src/util.c | 1795 +++++++++++++++-- src/util.h | 144 +- src/write.c | 91 +- src/write.h | 28 +- tools/Makefile.am | 10 +- tools/mobitool.1 | 77 +- tools/mobitool.c | 583 +++++- 35 files changed, 14175 insertions(+), 1364 deletions(-) create mode 100644 COPYING delete mode 100644 README create mode 100644 README.md create mode 100644 libmobi.pc.in create mode 100644 src/config.h create mode 100644 src/index.c create mode 100644 src/index.h create mode 100644 src/miniz.c create mode 100644 src/miniz.h create mode 100644 src/opf.c create mode 100644 src/opf.h create mode 100644 src/parse_rawml.c create mode 100644 src/parse_rawml.h diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..65c5ca8 --- /dev/null +++ b/COPYING @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/Makefile.am b/Makefile.am index c6e73f5..1bd5f0e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,7 +2,6 @@ SUBDIRS = src tools ACLOCAL_AMFLAGS = -I m4 -# what flags you want to pass to the C compiler & linker -AM_CFLAGS = --pedantic -Wall -std=gnu99 -O2 -AM_LDFLAGS = +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = libmobi.pc diff --git a/README b/README deleted file mode 100644 index e69de29..0000000 diff --git a/README.md b/README.md new file mode 100644 index 0000000..10f63ea --- /dev/null +++ b/README.md @@ -0,0 +1,64 @@ +# Libmobi + +C library for handling Mobipocket (MOBI) ebook format documents. +It is in a beta stage currently marked as version 0.1. + +There is a simple program included in the project: mobitool.c. +It may serve as an example how to use the library. + +## What works: +- reading and parsing: + - some older text Palmdoc formats, + - Mobipocket files, + - newer MOBI files including KF8 format, + - Replica Print files +- recreating source files using indices +- reconstructing references (links and embedded) in html files +- reconstructing source structure that can be fed back to kindlegen + +## Todo: +- reconstruct dictionaries +- process RESC records +- exporting to EPUB documents +- writing MOBI documents + +## Doxygen documentation: +- [functions](http://www.fabiszewski.net/libmobi/group__mobi__export.html), +- [structures for the raw, unparsed records metadata and data](http://www.fabiszewski.net/libmobi/group__raw__structs.html), +- [structures for the parsed records metadata and data](http://www.fabiszewski.net/libmobi/group__parsed__structs.html), +- [enums](http://www.fabiszewski.net/libmobi/group__mobi__enums.html) + +## Source: +- [on github](https://github.com/bfabiszewski/libmobi/) + +## Installation: + + $ ./autogen.sh + $ ./configure + $ make + $ sudo make install + +## Usage +- single include file: `#include ` +- linker flag: `-lmobi` + +## Requirements +- compiler supporting C99 +- tested with gcc (>=4.2.4), clang (llvm >=3.4) +- builds on Linux, MacOS X, Windows (MinGW) +- works cross-compiled on Kindle :) +- zlib (optional, configure --with-zlib=no to use included miniz.c instead) +- libxml2 (optional, enables OPF handling, configure --with-libxml2=no to disable) + +## License: +- LGPL, either version 3, or any later + +## Credits: +- The huffman decompression and KF8 parsing algorithms were learned by studying python source code of [KindleUnpack](http://wiki.mobileread.com/wiki/KindleUnpack) distributed with following license: + + Based on initial mobipocket version Copyright © 2009 Charles M. Hannum + Extensive Extensions and Improvements Copyright © 2009-2014 + By P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding, tkeo. + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3. + +- Thanks to all contributors of Mobileread [MOBI wiki](http://wiki.mobileread.com/wiki/MOBI) diff --git a/autogen.sh b/autogen.sh index b483139..13c49e3 100755 --- a/autogen.sh +++ b/autogen.sh @@ -1,2 +1,3 @@ #!/bin/sh +mkdir -p m4 && \ autoreconf --force --install -I m4 diff --git a/configure.ac b/configure.ac index 61d395d..1d5d457 100644 --- a/configure.ac +++ b/configure.ac @@ -6,26 +6,33 @@ AC_INIT([libmobi], [0.1]) AC_CONFIG_SRCDIR([src/buffer.c]) # Enable automake -AM_INIT_AUTOMAKE([-Wall -Werror foreign]) +AM_INIT_AUTOMAKE([-Wall foreign]) # all defined C macros (HAVE_*) will be saved to this file AC_CONFIG_HEADERS([config.h]) AC_CONFIG_MACRO_DIR([m4]) # Checks for programs. -AC_PROG_CC +AC_PROG_CC_C99 +AM_PROG_CC_C_O AC_PROG_INSTALL m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) # Init libtool -LT_INIT +#LT_INIT +AC_PROG_LIBTOOL # Checks for libraries. # Checks for header files. -AC_CHECK_HEADERS([stdlib.h string.h]) +AC_HEADER_STDBOOL +AC_CHECK_HEADERS([stdlib.h]) +AC_CHECK_HEADERS([string.h]) +AC_CHECK_HEADERS([utime.h]) +AC_CHECK_HEADERS([sys/resource.h]) # Checks for typedefs, structures, and compiler characteristics. AC_TYPE_INT32_T +AC_TYPE_INT64_T AC_TYPE_INT8_T AC_TYPE_SIZE_T AC_TYPE_UINT16_T @@ -34,11 +41,188 @@ AC_TYPE_UINT64_T AC_TYPE_UINT8_T # Checks for library functions. +AC_FUNC_MKTIME AC_FUNC_MALLOC AC_FUNC_REALLOC -AC_CHECK_FUNCS([memset strrchr]) +AC_CHECK_FUNCS([memset mkdir strdup strrchr strstr strtoul utime]) + +# test for --with-zlib +AC_MSG_CHECKING([if compile with zlib]) +AC_ARG_WITH(zlib, + AC_HELP_STRING([--with-zlib],[Use zlib instead of included miniz @<:@default=yes@:>@]), + [if test "$withval" = yes; then use_zlib=yes; else use_zlib=no; fi], + [use_zlib=yes]) +AC_MSG_RESULT($use_zlib) +AM_CONDITIONAL(USE_ZLIB,[test x$use_zlib = xyes]) +AM_CONDITIONAL(USE_MINIZ,[test x$use_zlib = xno]) +if test x$use_zlib = xyes; then + AC_CHECK_HEADER([zlib.h], + [AC_DEFINE(USE_ZLIB, 1, [Define if you want to use system zlib library]) + LIBZ_LDFLAGS=-lz + MINIZ_CFLAGS=], + [AC_MSG_ERROR([couldn't find zlib header])]) +else + AC_DEFINE(USE_MINIZ, 1, [Define if you want to use included miniz library]) + MINIZ_CFLAGS='-D_POSIX_C_SOURCE=200112L' + LIBZ_LDFLAGS= +fi +AC_SUBST(LIBZ_LDFLAGS) +AC_SUBST(MINIZ_CFLAGS) + +# test for --with-libxml2 +AC_MSG_CHECKING([if compile with libxml2]) +AC_ARG_WITH(libxml2, + AC_HELP_STRING([--with-libxml2],[Compile with libxml2 @<:@default=yes@:>@]), + [if test "$withval" = yes; then use_libxml2=yes; else use_libxml2=no; fi], + [use_libxml2=yes]) +AC_MSG_RESULT($use_libxml2) +AM_CONDITIONAL(USE_LIBXML2,[test x$use_libxml2 = xyes]) +if test x$use_libxml2 = xyes; then + old_CPPFLAGS=$CPPFLAGS + AC_CHECK_PROGS(XML2_CONFIG, xml2-config) + if test -n "$XML2_CONFIG" + then + CPPFLAGS="`$XML2_CONFIG --cflags`" + LIBXML2_CFLAGS="`$XML2_CONFIG --cflags`" + LIBXML2_LDFLAGS="`$XML2_CONFIG --libs`" + else + CPPFLAGS=-I/usr/include/libxml2 + LIBXML2_CFLAGS=-I/usr/include/libxml2 + LIBXML2_LDFLAGS=-lxml2 + fi + AC_CHECK_HEADER([libxml/xmlwriter.h], + [AC_DEFINE(USE_LIBXML2, 1, [Define if you want to use libxml2 library])], + [AC_MSG_ERROR([couldn't find libxml2])]) + CPPFLAGS=$old_CPPFLAGS +else + LIBXML2_LDFLAGS= + LIBXML2_CFLAGS= +fi +AC_SUBST(LIBXML2_LDFLAGS) +AC_SUBST(LIBXML2_CFLAGS) + +# Check for -fvisibility=hidden to determine if we can do GNU-style +# visibility attributes for symbol export control +AC_MSG_CHECKING([for -fvisibility=hidden compiler flag]) +VISIBILITY_HIDDEN= +case "$host" in + *-*-mingw*) + # on mingw32 we do -fvisibility=hidden and __declspec(dllexport) + VISIBILITY_HIDDEN='-fvisibility=hidden' + AC_MSG_RESULT(yes) + ;; + *) + # on other compilers, check if we can do -fvisibility=hidden + SAVED_CFLAGS="${CFLAGS}" + CFLAGS="-fvisibility=hidden -Werror" + AC_TRY_COMPILE([], [], + AC_MSG_RESULT(yes) + enable_fvisibility_hidden=yes, + AC_MSG_RESULT(no) + enable_fvisibility_hidden=no) + CFLAGS="${SAVED_CFLAGS}" + + AS_IF([test "${enable_fvisibility_hidden}" = "yes"], [ + VISIBILITY_HIDDEN='-fvisibility=hidden' + ]) + ;; +esac +AC_SUBST(VISIBILITY_HIDDEN) + +# MinGW seems to need this +case "$host" in + *-*-mingw*) + NO_UNDEFINED='-no-undefined' + AVOID_VERSION='-avoid-version' + ISO99_SOURCE='-D_ISOC99_SOURCE=1' + ;; + *) + NO_UNDEFINED= + AVOID_VERSION= + ISO99_SOURCE= + ;; +esac +AC_SUBST(NO_UNDEFINED) +AC_SUBST(AVOID_VERSION) +AC_SUBST(ISO99_SOURCE) + +# Check for non-broken inline under various spellings +AC_MSG_CHECKING([for inline keyword]) +def_inline="" +AC_TRY_COMPILE([], [} __inline__ int foo() { return 0; } +int bar() { return foo();], def_inline="__inline__", +[AC_TRY_COMPILE(, [} __inline int foo() { return 0; } +int bar() { return foo();], def_inline="__inline", +[AC_TRY_COMPILE(, [} inline int foo() { return 0; } +int bar() { return foo();], def_inline="inline")])]) +AC_MSG_RESULT($def_inline) +AC_DEFINE_UNQUOTED([MOBI_INLINE], [$def_inline], + [How to obtain function inlining.]) + +# Check --enable-debug +AC_MSG_CHECKING([if enable debugging]) +AC_ARG_ENABLE([debug], +AS_HELP_STRING([--enable-debug], + [enable debugging @<:@default=no@:>@]), + [case "${enableval}" in + yes) debug=yes ;; + no) debug=no ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-debug]) ;; + esac],[debug=no]) +AC_MSG_RESULT($debug) + +DEBUG_CFLAGS= +if test x$debug = xyes; then + AC_DEFINE([MOBI_DEBUG], 1, [Enable debugging]) + case "$host" in + *-*-mingw*) + # I give up with std=c99 here for now + # (http://sourceforge.net/p/mingw/bugs/2046/) + DEBUG_CFLAGS='-pedantic -Wall -Wextra -Werror' + ;; + *) + DEBUG_CFLAGS='-std=c99 -pedantic -Wall -Wextra -Werror' + ;; + esac +fi +AC_SUBST(DEBUG_CFLAGS) + +# Check --enable-debug-alloc +AC_MSG_CHECKING([if enable alloc debugging]) +AC_ARG_ENABLE([debug_alloc], +AS_HELP_STRING([--enable-debug-alloc], + [enable memory allocation debugging @<:@default=no@:>@]), + [case "${enableval}" in + yes) debug_alloc=yes ;; + no) debug_alloc=no ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-debug-alloc]) ;; + esac],[debug_alloc=no]) +AC_MSG_RESULT($debug_alloc) + +if test x$debug_alloc = xyes; then + AC_DEFINE([MOBI_DEBUG_ALLOC], 1, [Enable alloc debugging]) +fi + +# Check --enable-mobitool-static +AC_MSG_CHECKING([if link mobitool against static libmobi]) +AC_ARG_ENABLE([mobitool_static], +AS_HELP_STRING([--enable-mobitool-static], + [link mobitool against static libmobi @<:@default=no@:>@]), + [case "${enableval}" in + yes) mobitool_static=yes ;; + no) mobitool_static=no ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-mobitool-static]) ;; + esac],[mobitool_static=no]) +AC_MSG_RESULT($mobitool_static) +MOBITOOL_STATIC= +if test x$mobitool_static = xyes; then + MOBITOOL_STATIC='-static' +fi +AC_SUBST(MOBITOOL_STATIC) AC_CONFIG_FILES([Makefile]) +AC_CONFIG_FILES([libmobi.pc]) AC_CONFIG_FILES([src/Makefile]) AC_CONFIG_FILES([tools/Makefile]) + AC_OUTPUT diff --git a/libmobi.pc.in b/libmobi.pc.in new file mode 100644 index 0000000..fb6409c --- /dev/null +++ b/libmobi.pc.in @@ -0,0 +1,12 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libmobi +Description: MOBI ebook format handling library +URL: http://www.fabiszewski.net/libmobi +Version: @VERSION@ +Requires: +Libs: -L${libdir} -lmobi @LIBZ_LDFLAGS@ +Cflags: -I${includedir} diff --git a/src/Makefile.am b/src/Makefile.am index aebc615..f19c447 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,11 +1,14 @@ # libmobi -# what flags you want to pass to the C compiler & linker -AM_CFLAGS = --pedantic -Wall -std=gnu99 -O2 -AM_LDFLAGS = - -# this lists the binaries to produce, the (non-PHONY, binary) targets in -# the previous manual Makefile lib_LTLIBRARIES = libmobi.la -libmobi_la_SOURCES = buffer.c compression.c debug.c memory.c read.c util.c write.c \ - buffer.h compression.h debug.h memory.h mobi.h read.h util.h write.h +libmobi_la_SOURCES = buffer.c compression.c debug.c index.c memory.c parse_rawml.c read.c util.c write.c \ + buffer.h compression.h config.h debug.h index.h memory.h mobi.h parse_rawml.h read.h util.h write.h +if USE_LIBXML2 +libmobi_la_SOURCES += opf.c opf.h +endif +if USE_MINIZ +libmobi_la_SOURCES += miniz.c miniz.h +endif +include_HEADERS = mobi.h +libmobi_la_LDFLAGS = $(AVOID_VERSION) $(NO_UNDEFINED) $(LIBZ_LDFLAGS) $(LIBXML2_LDFLAGS) +libmobi_la_CFLAGS = $(VISIBILITY_HIDDEN) $(ISO99_SOURCE) $(DEBUG_CFLAGS) $(MINIZ_CFLAGS) $(LIBXML2_CFLAGS) diff --git a/src/buffer.c b/src/buffer.c index 6d86244..805d35d 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1,191 +1,613 @@ -// -// buffer.c -// mobi -// -// Created by Bartek on 27.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// - -#include -#include "buffer.h" +/** @file buffer.c + * @brief Functions to read/write raw big endian data + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ -#define MAX_BUFFER_SIZE 4096 +#include +#include +#include "buffer.h" +#include "debug.h" -MOBIBuffer * buffer_init(size_t len) { - MOBIBuffer *p = NULL; - p = malloc(sizeof(MOBIBuffer)); - if (p == NULL) { - printf("Buffer allocation failed\n"); +/** + @brief Initializer for MOBIBuffer structure + + It allocates memory for structure and for data. + Memory should be freed with buffer_free(). + + @param[in] len Size of data to be allocated for the buffer + @return MOBIBuffer on success, NULL otherwise + */ +MOBIBuffer * buffer_init(const size_t len) { + MOBIBuffer *buf = NULL; + buf = malloc(sizeof(MOBIBuffer)); + if (buf == NULL) { + debug_print("%s", "Buffer allocation failed\n"); return NULL; } - p->data = malloc(len); - if (p->data == NULL) { - free(p); - printf("Buffer data allocation failed\n"); + buf->data = malloc(len); + if (buf->data == NULL) { + free(buf); + debug_print("%s", "Buffer data allocation failed\n"); return NULL; } - p->offset = 0; - p->maxlen = len; - return p; + buf->offset = 0; + buf->maxlen = len; + buf->error = MOBI_SUCCESS; + return buf; } +/** + @brief Initializer for MOBIBuffer structure + + It allocates memory for structure but, unlike buffer_init(), it does not allocate memory for data. + Memory should be freed with buffer_free_null(). + + @param[in] len Size of data held by the buffer + @return MOBIBuffer on success, NULL otherwise + */ +MOBIBuffer * buffer_init_null(const size_t len) { + MOBIBuffer *buf = NULL; + buf = malloc(sizeof(MOBIBuffer)); + if (buf == NULL) { + debug_print("%s", "Buffer allocation failed\n"); + return NULL; + } + buf->data = NULL; + buf->offset = 0; + buf->maxlen = len; + buf->error = MOBI_SUCCESS; + return buf; +} -void buffer_add8(MOBIBuffer *p, uint8_t data) { - if (p->offset + 1 > p->maxlen) { - printf("Buffer full\n"); +/** + @brief Adds 8-bit value to MOBIBuffer + + @param[in,out] buf MOBIBuffer structure to be filled with data + @param[in] data Integer to be put into the buffer + */ +void buffer_add8(MOBIBuffer *buf, const uint8_t data) { + if (buf->offset + 1 > buf->maxlen) { + debug_print("%s", "Buffer full\n"); + buf->error = MOBI_BUFFER_END; return; } - p->data[p->offset++] = data; + buf->data[buf->offset++] = data; } -void buffer_add16(MOBIBuffer *p, uint16_t data) { - if (p->offset + 2 > p->maxlen) { - printf("Buffer full\n"); +/** + @brief Adds 16-bit value to MOBIBuffer + + @param[in,out] buf MOBIBuffer structure to be filled with data + @param[in] data Integer to be put into the buffer + */ +void buffer_add16(MOBIBuffer *buf, const uint16_t data) { + if (buf->offset + 2 > buf->maxlen) { + debug_print("%s", "Buffer full\n"); + buf->error = MOBI_BUFFER_END; return; } - p->data[p->offset++] = (data & 0xff00) >> 8; - p->data[p->offset++] = (data & 0xff); + unsigned char *buftr = buf->data + buf->offset; + *buftr++ = (uint8_t)((uint32_t)(data & 0xff00U) >> 8); + *buftr = (uint8_t)((uint32_t)(data & 0xffU)); + buf->offset += 2; } -void buffer_add32(MOBIBuffer *p, uint32_t data) { - if (p->offset + 4 > p->maxlen) { - printf("Buffer full\n"); +/** + @brief Adds 32-bit value to MOBIBuffer + + @param[in,out] buf MOBIBuffer structure to be filled with data + @param[in] data Integer to be put into the buffer + */ +void buffer_add32(MOBIBuffer *buf, const uint32_t data) { + if (buf->offset + 4 > buf->maxlen) { + debug_print("%s", "Buffer full\n"); + buf->error = MOBI_BUFFER_END; return; } - p->data[p->offset++] = (data & 0xff000000) >> 16; - p->data[p->offset++] = (data & 0xff0000) >> 12; - p->data[p->offset++] = (data & 0xff00) >> 8; - p->data[p->offset++] = (data & 0xff); + unsigned char *buftr = buf->data + buf->offset; + *buftr++ = (uint8_t)((uint32_t)(data & 0xff000000U) >> 16); + *buftr++ = (uint8_t)((uint32_t)(data & 0xff0000U) >> 12); + *buftr++ = (uint8_t)((uint32_t)(data & 0xff00U) >> 8); + *buftr = (uint8_t)((uint32_t)(data & 0xffU)); + buf->offset += 4; } -void buffer_addraw(MOBIBuffer *p, char* buf, size_t len) { - if (p->offset + len > p->maxlen) { - printf("Buffer full\n"); +/** + @brief Adds raw data to MOBIBuffer + + @param[in,out] buf MOBIBuffer structure to be filled with data + @param[in] data Pointer to read data + @param[in] len Size of the read data + */ +void buffer_addraw(MOBIBuffer *buf, const unsigned char* data, const size_t len) { + if (buf->offset + len > buf->maxlen) { + debug_print("%s", "Buffer full\n"); + buf->error = MOBI_BUFFER_END; return; } - memcpy(p->data + p->offset, buf, len); - p->offset += len; + memcpy(buf->data + buf->offset, data, len); + buf->offset += len; } -void buffer_addstring(MOBIBuffer *p, char *str) { - size_t len; - len = strlen(str); - buffer_addraw(p, str, len); +/** + @brief Adds zero padded string to MOBIBuffer + + @param[in,out] buf MOBIBuffer structure to be filled with data + @param[in] str Pointer to string + */ +void buffer_addstring(MOBIBuffer *buf, const char *str) { + const size_t len = strlen(str); + buffer_addraw(buf, (const unsigned char *) str, len); } -void buffer_addzeros(MOBIBuffer *p, size_t count) { - if (p->offset + count > p->maxlen) { - printf("Buffer full\n"); +/** + @brief Adds count of zeroes to MOBIBuffer + + @param[in,out] buf MOBIBuffer structure to be filled with data + @param[in] count Number of zeroes to be put into the buffer + */ +void buffer_addzeros(MOBIBuffer *buf, const size_t count) { + if (buf->offset + count > buf->maxlen) { + debug_print("%s", "Buffer full\n"); + buf->error = MOBI_BUFFER_END; return; } - memset(p->data + p->offset, 0, count); - p->offset += count; + memset(buf->data + buf->offset, 0, count); + buf->offset += count; } -uint8_t buffer_get8(MOBIBuffer *p) { - if (p->offset + 1 > p->maxlen) { - printf("End of buffer\n"); +/** + @brief Reads 8-bit value from MOBIBuffer + + @param[in] buf MOBIBuffer structure containing data + @return Read value, 0 if end of buffer is encountered + */ +uint8_t buffer_get8(MOBIBuffer *buf) { + if (buf->offset + 1 > buf->maxlen) { + debug_print("%s", "End of buffer\n"); + buf->error = MOBI_BUFFER_END; return 0; } - return (uint8_t) p->data[p->offset++]; + return buf->data[buf->offset++]; } -uint16_t buffer_get16(MOBIBuffer *p) { - if (p->offset + 2 > p->maxlen) { - printf("End of buffer\n"); +/** + @brief Reads 16-bit value from MOBIBuffer + + @param[in] buf MOBIBuffer structure containing data + @return Read value, 0 if end of buffer is encountered + */ +uint16_t buffer_get16(MOBIBuffer *buf) { + if (buf->offset + 2 > buf->maxlen) { + debug_print("%s", "End of buffer\n"); + buf->error = MOBI_BUFFER_END; return 0; } uint16_t val; - val = (uint8_t) p->data[p->offset] << 8 | (uint8_t) p->data[p->offset + 1]; - p->offset += 2; + val = (uint16_t)((uint16_t) buf->data[buf->offset] << 8 | (uint16_t) buf->data[buf->offset + 1]); + buf->offset += 2; return val; } -uint32_t buffer_get32(MOBIBuffer *p) { - if (p->offset + 4 > p->maxlen) { - printf("End of buffer\n"); +/** + @brief Reads 32-bit value from MOBIBuffer + + @param[in] buf MOBIBuffer structure containing data + @return Read value, 0 if end of buffer is encountered + */ +uint32_t buffer_get32(MOBIBuffer *buf) { + if (buf->offset + 4 > buf->maxlen) { + debug_print("%s", "End of buffer\n"); + buf->error = MOBI_BUFFER_END; return 0; } uint32_t val; - val = (uint8_t) p->data[p->offset] << 24 | (uint8_t) p->data[p->offset + 1] << 16 | (uint8_t) p->data[p->offset + 2] << 8 | (uint8_t) p->data[p->offset + 3]; - p->offset += 4; + val = (uint32_t) buf->data[buf->offset] << 24 | (uint32_t) buf->data[buf->offset + 1] << 16 | (uint32_t) buf->data[buf->offset + 2] << 8 | (uint32_t) buf->data[buf->offset + 3]; + buf->offset += 4; + return val; +} + +/** + @brief Reads variable length value from MOBIBuffer + + Internal function for wrappers: + buffer_get_varlen(); + buffer_get_varlen_dec(); + + Reads maximum 4 bytes from the buffer. Stops when byte has bit 7 set. + + @param[in] buf MOBIBuffer structure containing data + @param[out] len Value will be increased by number of bytes read + @param[in] direction 1 - read buffer forward, -1 - read buffer backwards + @return Read value, 0 if end of buffer is encountered + */ +static uint32_t _buffer_get_varlen(MOBIBuffer *buf, size_t *len, const int direction) { + uint32_t val = 0; + uint8_t byte_count = 0; + uint8_t byte; + uint8_t stop_flag = 0x80U; + uint8_t mask = 0x7fU; + do { + if (direction == 1) { + if (buf->offset + 1 > buf->maxlen) { + debug_print("%s", "End of buffer\n"); + buf->error = MOBI_BUFFER_END; + return val; + } + byte = buf->data[buf->offset++]; + } else { + if (buf->offset < 1) { + debug_print("%s", "End of buffer\n"); + buf->error = MOBI_BUFFER_END; + return val; + } + byte = buf->data[buf->offset--]; + } + val <<= 7; + val |= (byte & mask); + (*len)++; + byte_count++; + } while (!(byte & stop_flag) && (byte_count < 4)); return val; } -void buffer_getstring(char *str, MOBIBuffer *p, size_t len) { - if (p->offset + len > p->maxlen) { - printf("End of buffer\n"); +/** + @brief Reads variable length value from MOBIBuffer + + Reads maximum 4 bytes from the buffer. Stops when byte has bit 7 set. + + @param[in] buf MOBIBuffer structure containing data + @param[out] len Value will be increased by number of bytes read + @return Read value, 0 if end of buffer is encountered + */ +uint32_t buffer_get_varlen(MOBIBuffer *buf, size_t *len) { + return _buffer_get_varlen(buf, len, 1); +} + +/** + @brief Reads variable length value from MOBIBuffer going backwards + + Reads maximum 4 bytes from the buffer. Stops when byte has bit 7 set. + + @param[in] buf MOBIBuffer structure containing data + @param[out] len Value will be increased by number of bytes read + @return Read value, 0 if end of buffer is encountered + */ +uint32_t buffer_get_varlen_dec(MOBIBuffer *buf, size_t *len) { + return _buffer_get_varlen(buf, len, -1); +} + +/** + @brief Reads raw data from MOBIBuffer and pads it with zero character + + @param[out] str Destination for string read from buffer. Length must be (len + 1) + @param[in] buf MOBIBuffer structure containing data + @param[in] len Length of the data to be read from buffer + */ +void buffer_getstring(char *str, MOBIBuffer *buf, const size_t len) { + if (!str) { + buf->error = MOBI_PARAM_ERR; + return; + } + if (buf->offset + len > buf->maxlen) { + debug_print("%s", "End of buffer\n"); + buf->error = MOBI_BUFFER_END; + return; + } + memcpy(str, buf->data + buf->offset, len); + str[len] = '\0'; + buf->offset += len; +} + +/** + @brief Reads raw data from MOBIBuffer, appends it to a string and pads it with zero character + + @param[in,out] str A string to which data will be appended + @param[in] buf MOBIBuffer structure containing data + @param[in] len Length of the data to be read from buffer + */ +void buffer_appendstring(char *str, MOBIBuffer *buf, const size_t len) { + if (!str) { + buf->error = MOBI_PARAM_ERR; return; } - strncpy(str, p->data + p->offset, len); - p->offset += len; + if (buf->offset + len > buf->maxlen) { + debug_print("%s", "End of buffer\n"); + buf->error = MOBI_BUFFER_END; + return; + } + size_t str_len = strlen(str); + memcpy(str + str_len, buf->data + buf->offset, len); + str[str_len + len] = '\0'; + buf->offset += len; } -void buffer_getraw(void *ptr, MOBIBuffer *p, size_t len) { - if (p->offset + len > p->maxlen) { - printf("End of buffer\n"); +/** + @brief Reads raw data from MOBIBuffer + + @param[out] data Destination to which data will be appended + @param[in] buf MOBIBuffer structure containing data + @param[in] len Length of the data to be read from buffer + */ +void buffer_getraw(void *data, MOBIBuffer *buf, const size_t len) { + if (!data) { + buf->error = MOBI_PARAM_ERR; + return; + } + if (buf->offset + len > buf->maxlen) { + debug_print("%s", "End of buffer\n"); + buf->error = MOBI_BUFFER_END; return; } - memcpy(ptr, p->data + p->offset, len); - p->offset += len; + memcpy(data, buf->data + buf->offset, len); + buf->offset += len; } -void buffer_copy8(uint8_t **val, MOBIBuffer *p) { +/** + @brief Read 8-bit value from MOBIBuffer into allocated memory + + Read 8-bit value from buffer into memory allocated by the function. + Returns pointer to the value, which must be freed later. + If the data is not accessible function will return null pointer. + + @param[out] val Pointer to value or null pointer on failure + @param[in] buf MOBIBuffer structure containing data + */ +void buffer_dup8(uint8_t **val, MOBIBuffer *buf) { *val = NULL; - if (p->offset + 1 > p->maxlen) { + if (buf->offset + 1 > buf->maxlen) { return; } *val = malloc(sizeof(uint8_t)); if (*val == NULL) { return; } - **val = (uint8_t) p->data[p->offset++]; + **val = buffer_get8(buf); } -void buffer_copy16(uint16_t **val, MOBIBuffer *p) { +/** + @brief Read 16-bit value from MOBIBuffer into allocated memory + + Read 16-bit value from buffer into allocated memory. + Returns pointer to the value, which must be freed later. + If the data is not accessible function will return null pointer. + + @param[out] val Pointer to value or null pointer on failure + @param[in] buf MOBIBuffer structure containing data + */ +void buffer_dup16(uint16_t **val, MOBIBuffer *buf) { *val = NULL; - if (p->offset + 2 > p->maxlen) { + if (buf->offset + 2 > buf->maxlen) { return; } *val = malloc(sizeof(uint16_t)); if (*val == NULL) { return; } - **val = (uint8_t) p->data[p->offset] << 8 | (uint8_t) p->data[p->offset + 1]; - p->offset += 2; + **val = buffer_get16(buf); } -void buffer_copy32(uint32_t **val, MOBIBuffer *p) { +/** + @brief Read 32-bit value from MOBIBuffer into allocated memory + + Read 32-bit value from buffer into allocated memory. + Returns pointer to the value, which must be freed later. + If the data is not accessible function will return null pointer. + + @param[out] val Pointer to value + @param[in] buf MOBIBuffer structure containing data + */ +void buffer_dup32(uint32_t **val, MOBIBuffer *buf) { *val = NULL; - if (p->offset + 4 > p->maxlen) { + if (buf->offset + 4 > buf->maxlen) { return; } *val = malloc(sizeof(uint32_t)); if (*val == NULL) { return; } - **val = (uint8_t) p->data[p->offset] << 24 | (uint8_t) p->data[p->offset + 1] << 16 | (uint8_t) p->data[p->offset + 2] << 8 | (uint8_t) p->data[p->offset + 3]; - p->offset += 4; + **val = buffer_get32(buf); +} + +/** + @brief Copy 8-bit value from one MOBIBuffer into another + + @param[out] dest Destination buffer + @param[in] source Source buffer + */ +void buffer_copy8(MOBIBuffer *dest, MOBIBuffer *source) { + buffer_add8(dest, buffer_get8(source)); +} + +/** + @brief Copy raw value from one MOBIBuffer into another + + @param[out] dest Destination buffer + @param[in] source Source buffer + @param[in] len Number of bytes to copy + */ +void buffer_copy(MOBIBuffer *dest, MOBIBuffer *source, const size_t len) { + if (source->offset + len > source->maxlen) { + debug_print("%s", "End of buffer\n"); + source->error = MOBI_BUFFER_END; + return; + } + if (dest->offset + len > dest->maxlen) { + debug_print("%s", "End of buffer\n"); + dest->error = MOBI_BUFFER_END; + return; + } + memcpy(dest->data + dest->offset, source->data + source->offset, len); + dest->offset += len; + source->offset += len; } -int is_littleendian() { - volatile uint32_t i = 1; - return (*((uint8_t*)(&i))) == 1; +bool buffer_match_magic(MOBIBuffer *buf, const char *magic) { + const size_t magic_length = strlen(magic); + if (buf->offset + magic_length > buf->maxlen) { + return false; + } + if (memcmp(buf->data + buf->offset, magic, magic_length) == 0) { + return true; + } + return false; +} + +/** + @brief Free pointer to MOBIBuffer structure and pointer to data + + Free data initialized with buffer_init(); + + @param[in] buf MOBIBuffer structure + */ +void buffer_free(MOBIBuffer *buf) { + if (buf == NULL) { return; } + if (buf->data != NULL) { + free(buf->data); + } + free(buf); } -uint32_t endian_swap32(uint32_t x) { - return - (x & 0xff) << 24 | - (x & 0xff00) << 8 | - (x & 0xff0000) >> 8 | - (x & 0xff000000) >> 24; +/** + @brief Free pointer to MOBIBuffer structure + + Free data initialized with buffer_init_null(); + Unlike buffer_free() it will not free pointer to buf->data + + @param[in] buf MOBIBuffer structure + */ +void buffer_free_null(MOBIBuffer *buf) { + if (buf == NULL) { return; } + free(buf); } -void buffer_free(MOBIBuffer *p) { - if (p == NULL) return; - - if (p->data != NULL) { - free(p->data); + + +/** + @brief Initializer for MOBIArray structure + + It allocates memory for structure and for data: array of size_t variables. + Memory should be freed with array_free(). + + @param[in] len Initial size of the array + @return MOBIArray on success, NULL otherwise + */ +MOBIArray * array_init(const size_t len) { + MOBIArray *arr = NULL; + arr = malloc(sizeof(MOBIArray)); + if (arr == NULL) { + debug_print("%s", "Array allocation failed\n"); + return NULL; + } + arr->data = malloc(len * sizeof(size_t)); + if (arr->data == NULL) { + free(arr); + debug_print("%s", "Array data allocation failed\n"); + return NULL; } - free(p); + arr->maxsize = arr->step = len; + arr->size = 0; + return arr; +} + +/** + @brief Initializer for MOBIBuffer structure + + It allocates memory for structure and for data. + Memory should be freed with buffer_free(). + + @param[in,out] arr MOBIArray array + @param[in] value Value to be inserted + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET array_insert(MOBIArray *arr, const uint32_t value) { + if (!arr || arr->maxsize == 0) { + return MOBI_INIT_FAILED; + } + if (arr->maxsize == arr->size) { + arr->maxsize += arr->step; + uint32_t *tmp = realloc(arr->data, arr->maxsize * sizeof(size_t)); + if (!tmp) { + free(arr->data); + arr->data = NULL; + return MOBI_MALLOC_FAILED; + } + arr->data = tmp; + } + arr->data[arr->size] = value; + arr->size++; + return MOBI_SUCCESS; +} + +/** + @brief Helper for qsort in array_sort() function. + + @param[in] a First element to compare + @param[in] b Second element to compare + @return -1 if a < b; 1 if a > b; 0 if a = b + */ +static int array_compare(const void *a, const void *b) { + if (*(size_t *) a < *(size_t *) b) { + return -1; + }; + if (*(size_t *) a > *(size_t *) b) { + return 1; + }; + return 0; +} + +/** + @brief Sort MOBIArray in ascending order. + + When unique is set to true, duplicate values are discarded. + + @param[in,out] arr MOBIArray array + @param[in] unique Discard duplicate values if true + */ +void array_sort(MOBIArray *arr, const bool unique) { + if (!arr || !arr->data || arr->size == 0) { + return; + } + qsort(arr->data, arr->size, sizeof(size_t), array_compare); + if (unique) { + size_t i = 1, j = 1; + while (i < arr->size) { + if (arr->data[j - 1] == arr->data[i]) { + i++; + continue; + } + arr->data[j++] = arr->data[i++]; + } + arr->size = j; + } +} + +/** + @brief Get size of the array + + @param[in] arr MOBIArray structure + */ +size_t array_size(MOBIArray *arr) { + return arr->size; +} + +/** + @brief Free MOBIArray structure and contained data + + Free data initialized with array_init(); + + @param[in] arr MOBIArray structure + */ +void array_free(MOBIArray *arr) { + if (!arr) { return; } + if (arr->data) { + free(arr->data); + } + free(arr); } diff --git a/src/buffer.h b/src/buffer.h index 88b78cf..276e6a7 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -1,42 +1,68 @@ -// -// buffer.h -// mobi -// -// Created by Bartek on 27.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file buffer.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ -#ifndef mobi_buffer_h -#define mobi_buffer_h - -#include -#include +#ifndef libmobi_buffer_h +#define libmobi_buffer_h +#include "config.h" #include "mobi.h" +/** + @brief Buffer to read to/write from + */ typedef struct { - char *data; - size_t offset; - size_t maxlen; + size_t offset; /**< Current offset in respect to buffer start */ + size_t maxlen; /**< Length of the buffer data */ + unsigned char *data; /**< Pointer to buffer data */ + MOBI_RET error; /**< MOBI_SUCCESS = 0 if operation on buffer is successful, non-zero value on failure */ } MOBIBuffer; -MOBIBuffer * buffer_init(size_t len); -void buffer_add8(MOBIBuffer *p, uint8_t data); -void buffer_add16(MOBIBuffer *p, uint16_t data); -void buffer_add32(MOBIBuffer *p, uint32_t data); -void buffer_addraw(MOBIBuffer *p, char* buf, size_t len); -void buffer_addstring(MOBIBuffer *p, char *str); -void buffer_addzeros(MOBIBuffer *p, size_t count); -uint8_t buffer_get8(MOBIBuffer *p); -uint16_t buffer_get16(MOBIBuffer *p); -uint32_t buffer_get32(MOBIBuffer *p); -void buffer_copy8(uint8_t **val, MOBIBuffer *p); -void buffer_copy16(uint16_t **val, MOBIBuffer *p); -void buffer_copy32(uint32_t **val, MOBIBuffer *p); -void buffer_getstring(char *str, MOBIBuffer *p, size_t len); -void buffer_getraw(void *ptr, MOBIBuffer *p, size_t len); -void buffer_free(MOBIBuffer *p); -int is_littleendian(); -uint32_t endian_swap32(uint32_t x); +MOBIBuffer * buffer_init(const size_t len); +MOBIBuffer * buffer_init_null(const size_t len); +void buffer_add8(MOBIBuffer *buf, const uint8_t data); +void buffer_add16(MOBIBuffer *buf, const uint16_t data); +void buffer_add32(MOBIBuffer *buf, const uint32_t data); +void buffer_addraw(MOBIBuffer *buf, const unsigned char* data, const size_t len); +void buffer_addstring(MOBIBuffer *buf, const char *str); +void buffer_addzeros(MOBIBuffer *buf, const size_t count); +uint8_t buffer_get8(MOBIBuffer *buf); +uint16_t buffer_get16(MOBIBuffer *buf); +uint32_t buffer_get32(MOBIBuffer *buf); +uint32_t buffer_get_varlen(MOBIBuffer *buf, size_t *len); +uint32_t buffer_get_varlen_dec(MOBIBuffer *buf, size_t *len); +void buffer_dup8(uint8_t **val, MOBIBuffer *buf); +void buffer_dup16(uint16_t **val, MOBIBuffer *buf); +void buffer_dup32(uint32_t **val, MOBIBuffer *buf); +void buffer_getstring(char *str, MOBIBuffer *buf, const size_t len); +void buffer_appendstring(char *str, MOBIBuffer *buf, const size_t len); +void buffer_getraw(void *data, MOBIBuffer *buf, const size_t len); +void buffer_copy8(MOBIBuffer *in, MOBIBuffer *source); +void buffer_copy(MOBIBuffer *dest, MOBIBuffer *source, size_t len); +bool buffer_match_magic(MOBIBuffer *buf, const char *magic); +void buffer_free(MOBIBuffer *buf); +void buffer_free_null(MOBIBuffer *buf); + +/** + @brief Dynamic array of uint32_t values structure + */ +typedef struct { + uint32_t *data; /**< Array */ + size_t maxsize; /**< Allocated size */ + size_t step; /**< Step by which array will be enlarged if out of memory */ + size_t size; /**< Current size */ +} MOBIArray; + +MOBIArray * array_init(const size_t len); +MOBI_RET array_insert(MOBIArray *arr, uint32_t value); +void array_sort(MOBIArray *arr, bool unique); +size_t array_size(MOBIArray *arr); +void array_free(MOBIArray *arr); #endif diff --git a/src/compression.c b/src/compression.c index a50f6e2..b275970 100644 --- a/src/compression.c +++ b/src/compression.c @@ -1,109 +1,147 @@ -// -// compression.c -// mobi -// -// Created by Bartek on 27.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file compression.c + * @brief Functions handling compression + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ #include -#include - #include "compression.h" +#include "buffer.h" #include "mobi.h" +#include "debug.h" + + +/** + @brief Decompressor fo PalmDOC version of LZ77 compression + Decompressor based on this algorithm: + http://en.wikibooks.org/wiki/Data_Compression/Dictionary_compression#PalmDoc -// PalmDOC version of LZ77 compression -// Decompressor based on this algorithm: -// http://en.wikibooks.org/wiki/Data_Compression/Dictionary_compression#PalmDoc -// -size_t mobi_decompress_lz77(char *out, const char *in, size_t len) { - size_t start_in = (size_t) in; - size_t start_out = (size_t) out; - while ((size_t) in - start_in < len) { - uint8_t val = (uint8_t) in[0]; - // byte pair: space + char - if (val >= 0xc0) { - *(out++) = ' '; - *(out++) = val ^ 0x80; - in++; + @param[out] out Decompressed destination data + @param[in] in Compressed source data + @param[in,out] len_out Size of the memory reserved for decompressed data. + On return it is set to actual size of decompressed data + @param[in] len_in Size of compressed data + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_decompress_lz77(unsigned char *out, const unsigned char *in, size_t *len_out, const size_t len_in) { + MOBI_RET ret = MOBI_SUCCESS; + MOBIBuffer *buf_in = buffer_init_null(len_in); + if (buf_in == NULL) { + return MOBI_MALLOC_FAILED; + } + MOBIBuffer *buf_out = buffer_init_null(*len_out); + if (buf_out == NULL) { + buffer_free_null(buf_in); + return MOBI_MALLOC_FAILED; + } + /* FIXME: is it ok to cast const to non-const here */ + /* or is there a better way? */ + buf_in->data = (unsigned char *) in; + buf_out->data = out; + while (ret == MOBI_SUCCESS && buf_in->offset < buf_in->maxlen) { + uint8_t byte = buffer_get8(buf_in); + /* byte pair: space + char */ + if (byte >= 0xc0) { + buffer_add8(buf_out, ' '); + buffer_add8(buf_out, byte ^ 0x80); } - // length, distance pair - // 0x8000 + (distance << 3) + ((length-3) & 0x07) - else if (val >= 0x80) { - uint16_t distance = ((((in[0] << 8) | ((uint8_t)in[1])) >> 3) & 0x7ff); - uint8_t length = (in[1] & 0x7) + 3; - while (length-- > 0) { - *(out) = *(out - distance); - out++; + /* length, distance pair */ + /* 0x8000 + (distance << 3) + ((length-3) & 0x07) */ + else if (byte >= 0x80) { + uint8_t next = buffer_get8(buf_in); + uint16_t distance = ((((byte << 8) | ((uint8_t)next)) >> 3) & 0x7ff); + uint8_t length = (next & 0x7) + 3; + while (length--) { + buffer_add8(buf_out, *(buf_out->data + buf_out->offset - distance)); } - in += 2; } - // single char, not modified - else if (val >= 0x09) { - *(out++) = *(in++); + /* single char, not modified */ + else if (byte >= 0x09) { + buffer_add8(buf_out, byte); } - // n chars not modified - else if (val >= 0x01) { - memcpy(out, ++in, val); - out += val; - in += val; + /* val chars not modified */ + else if (byte >= 0x01) { + buffer_copy(buf_out, buf_in, byte); } - // char '\0', not modified + /* char '\0', not modified */ else { - *(out++) = *(in++); + buffer_copy8(buf_out, buf_in); + } + if (buf_in->error || buf_out->error) { + ret = MOBI_BUFFER_END; } } - return (size_t) out - start_out; + *len_out = buf_out->offset; + buffer_free_null(buf_out); + buffer_free_null(buf_in); + return ret; } -uint64_t _fill_buffer(const char *in, size_t len) { - uint32_t in1 = 0L; - uint32_t in2 = 0L; - len = (len < 8) ? len : 8; - size_t i = 0; - while (i < len && i < 4) { - in1 |= (uint8_t) in[i] << ((3-i) * 8); - i++; +/** + @brief Read at most 8 bytes from buffer, big-endian + + If buffer data is shorter returned value is padded with zeroes + + @param[in] buf MOBIBuffer structure to read from + @return 64-bit value + */ +static MOBI_INLINE uint64_t buffer_fill64(MOBIBuffer *buf) { + uint64_t val = 0; + uint8_t i = 8; + size_t bytesleft = buf->maxlen - buf->offset; + unsigned char *ptr = buf->data + buf->offset; + while (i-- && bytesleft--) { + val |= (uint64_t) *ptr++ << (i * 8); } - while (i < len) { - in2 |= (uint8_t) in[i] << ((3-i) * 8); - i++; - } - return (uint64_t) in1 << 32 | in2; + /* increase counter by 4 bytes only, 4 bytes overlap on each call */ + buf->offset += 4; + return val; } -int shortcnt = 0; - -// Mobi version of Huffman coding -// Decompressor and HUFF/CDIC records parsing based on: -// perl EBook::Tools::Mobipocket -// python mobiunpack.py, calibre -size_t mobi_decompress_huffman(char *out, const char *in, size_t len, MOBIHuffCdic *huffcdic, size_t depth) { - size_t start_out = (size_t) out; +/** + @brief Internal function for huff/cdic decompression + + Decompressor and HUFF/CDIC records parsing based on: + perl EBook::Tools::Mobipocket + python mobiunpack.py, calibre + + @param[out] buf_out MOBIBuffer structure with decompressed data + @param[in] buf_in MOBIBuffer structure with compressed data + @param[in] huffcdic MOBIHuffCdic structure with parsed data from huff/cdic records + @param[in] depth Depth of current recursion level + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +static MOBI_RET mobi_decompress_huffman_internal(MOBIBuffer *buf_out, MOBIBuffer *buf_in, const MOBIHuffCdic *huffcdic, size_t depth) { + if (depth > MOBI_HUFFMAN_MAXDEPTH) { + debug_print("Too many levels of recursion: %zu\n", depth); + return MOBI_DATA_CORRUPT; + } + MOBI_RET ret = MOBI_SUCCESS; int8_t bitcount = 32; - int32_t bitsleft = (int32_t) len * 8; - uint32_t t1, offset; - uint32_t code, maxcode, symbol_length; - uint8_t code_length = 0, i; - uint32_t index; - uint64_t buffer; - buffer = _fill_buffer(in, len); - while (1) { + /* this cast should be safe: max record size is 4096 */ + int bitsleft = (int) (buf_in->maxlen * 8); + uint8_t code_length = 0; + uint64_t buffer = buffer_fill64(buf_in); + while (ret == MOBI_SUCCESS) { if (bitcount <= 0) { bitcount += 32; - in += 4; - buffer = _fill_buffer(in, (bitsleft + (8 - 1)) / 8); + buffer = buffer_fill64(buf_in); } - code = (buffer >> bitcount) & 0xffffffff; - // lookup code in table1 - t1 = huffcdic->table1[code >> 24]; - // get maxcode and codelen from t1 + uint32_t code = (buffer >> bitcount) & 0xffffffffU; + /* lookup code in table1 */ + uint32_t t1 = huffcdic->table1[code >> 24]; + /* get maxcode and codelen from t1 */ code_length = t1 & 0x1f; - maxcode = (((t1 >> 8) + 1) << (32 - code_length)) - 1; - // check termination bit + uint32_t maxcode = (((t1 >> 8) + 1) << (32 - code_length)) - 1; + /* check termination bit */ if (!(t1 & 0x80)) { - // get offset from mincode, maxcode tables + /* get offset from mincode, maxcode tables */ while (code < huffcdic->mincode_table[code_length]) { code_length++; } @@ -114,26 +152,67 @@ size_t mobi_decompress_huffman(char *out, const char *in, size_t len, MOBIHuffCd if (bitsleft < 0) { break; } - // get index for symbol offset - index = (maxcode - code) >> (32 - code_length); - // check which part of cdic to use - i = index >> huffcdic->code_length; - // get offset - offset = huffcdic->symbol_offsets[index]; - symbol_length = (uint8_t) huffcdic->symbols[i][offset] << 8 | (uint8_t) huffcdic->symbols[i][offset + 1]; - // 1st bit is is_decompressed flag + /* get index for symbol offset */ + uint32_t index = (uint32_t) (maxcode - code) >> (32 - code_length); + /* check which part of cdic to use */ + uint8_t cdic_index = (uint8_t) ((uint32_t)index >> huffcdic->code_length); + /* get offset */ + uint32_t offset = huffcdic->symbol_offsets[index]; + uint32_t symbol_length = (uint32_t) huffcdic->symbols[cdic_index][offset] << 8 | (uint32_t) huffcdic->symbols[cdic_index][offset + 1]; + /* 1st bit is is_decompressed flag */ int is_decompressed = symbol_length >> 15; - // get rid of flag + /* get rid of flag */ symbol_length &= 0x7fff; if (is_decompressed) { - memcpy(out, (huffcdic->symbols[i] + offset + 2), symbol_length); - out += symbol_length; + /* symbol is at (offset + 2), 2 bytes used earlier for symbol length */ + buffer_addraw(buf_out, (huffcdic->symbols[cdic_index] + offset + 2), symbol_length); + ret = buf_out->error; } else { - // symbol is compressed - // TODO cache uncompressed symbols? - out += mobi_decompress_huffman(out, (huffcdic->symbols[i] + offset + 2), (symbol_length), huffcdic, depth + 1); + /* symbol is compressed */ + /* TODO cache uncompressed symbols? */ + MOBIBuffer buf_sym; + buf_sym.data = huffcdic->symbols[cdic_index] + offset + 2; + buf_sym.offset = 0; + buf_sym.maxlen = symbol_length; + buf_sym.error = MOBI_SUCCESS; + ret = mobi_decompress_huffman_internal(buf_out, &buf_sym, huffcdic, depth + 1); } } - return (size_t) out - start_out; + return ret; +} +/** + @brief Decompressor for huff/cdic compressed text records + + Decompressor and HUFF/CDIC records parsing based on: + perl EBook::Tools::Mobipocket + python mobiunpack.py, calibre + + @param[out] out Decompressed destination data + @param[in] in Compressed source data + @param[in,out] len_out Size of the memory reserved for decompressed data. + On return it is set to actual size of decompressed data + @param[in] len_in Size of compressed data + @param[in] huffcdic MOBIHuffCdic structure with parsed data from huff/cdic records + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_decompress_huffman(unsigned char *out, const unsigned char *in, size_t *len_out, size_t len_in, const MOBIHuffCdic *huffcdic) { + MOBIBuffer *buf_in = buffer_init_null(len_in); + if (buf_in == NULL) { + return MOBI_MALLOC_FAILED; + } + MOBIBuffer *buf_out = buffer_init_null(*len_out); + if (buf_out == NULL) { + buffer_free_null(buf_in); + return MOBI_MALLOC_FAILED; + } + /* FIXME: is it ok to cast const to non-const here */ + /* or is there a better way? */ + buf_in->data = (unsigned char *) in; + buf_out->data = out; + MOBI_RET ret = mobi_decompress_huffman_internal(buf_out, buf_in, huffcdic, 0); + *len_out = buf_out->offset; + buffer_free_null(buf_out); + buffer_free_null(buf_in); + return ret; } diff --git a/src/compression.h b/src/compression.h index 3d1678c..be23873 100644 --- a/src/compression.h +++ b/src/compression.h @@ -1,29 +1,27 @@ -// -// compression.h -// mobi -// -// Created by Bartek on 27.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file compression.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ -#ifndef mobi_lz77_h -#define mobi_lz77_h +#ifndef libmobi_compression_h +#define libmobi_compression_h -#include -#include +#include "config.h" +#include "mobi.h" -typedef struct { - size_t index_count; - size_t index_read; - size_t code_length; - uint32_t table1[256]; - uint32_t mincode_table[33]; - uint32_t maxcode_table[33]; - uint16_t *symbol_offsets; - char **symbols; -} MOBIHuffCdic; +#ifndef MOBI_INLINE +#define MOBI_INLINE /**< Syntax for compiler inline keyword from config.h */ +#endif + +/* FIXME: what is the reasonable value? */ +#define MOBI_HUFFMAN_MAXDEPTH 15 /**< Maximal recursion level for huffman decompression routine */ -size_t mobi_decompress_lz77(char *out, const char *in, size_t len); -size_t mobi_decompress_huffman(char *out, const char *in, size_t len, MOBIHuffCdic *huffcdic, size_t depth); +MOBI_RET mobi_decompress_lz77(unsigned char *out, const unsigned char *in, size_t *len_out, const size_t len_in); +MOBI_RET mobi_decompress_huffman(unsigned char *out, const unsigned char *in, size_t *len_out, size_t len_in, const MOBIHuffCdic *huffcdic); #endif diff --git a/src/config.h b/src/config.h new file mode 100644 index 0000000..2d629f1 --- /dev/null +++ b/src/config.h @@ -0,0 +1,16 @@ +/** @file src/config.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + +#ifndef mobi_config_h +#define mobi_config_h + +#include "../config.h" + +#endif diff --git a/src/debug.c b/src/debug.c index 1767df1..5ba6ad5 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1,38 +1,72 @@ -// -// debug.c -// mobi -// -// Created by Bartek on 02.04.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// - -#include "debug.h" +/** @file debug.c + * @brief Debugging functions, enable by running configure --enable-debug + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ #include -#if MOBI_DEBUG -// debug -void debug_free(void *ptr, char *file, int line){ +#include "debug.h" + +/** + @brief Debugging wrapper for free(void *ptr) + + @param[in] ptr Pointer + @param[in] file Calling file + @param[in] line Calling line + */ +void debug_free(void *ptr, const char *file, const int line) { printf("%s:%d: free(%p)\n",file, line, ptr); (free)(ptr); } -void *debug_malloc(size_t size, char *file, int line) { +/** + @brief Debugging wrapper for malloc(size_t size) + + @param[in] size Size of memory + @param[in] file Calling file + @param[in] line Calling line + @return A pointer to the allocated memory block on success, NULL on failure + + */ +void *debug_malloc(const size_t size, const char *file, const int line) { void *ptr = (malloc)(size); printf("%s:%d: malloc(%d)=%p\n", file, line, (int)size, ptr); return ptr; } -void *debug_realloc(void *ptr, size_t size, char *file, int line) { +/** + @brief Debugging wrapper for realloc(void* ptr, size_t size) + + @param[in] ptr Pointer + @param[in] size Size of memory + @param[in] file Calling file + @param[in] line Calling line + @return A pointer to the reallocated memory block on success, NULL on failure + */ +void *debug_realloc(void *ptr, const size_t size, const char *file, const int line) { printf("%s:%d: realloc(%p", file, line, ptr); void *rptr = (realloc)(ptr, size); printf(", %d)=%p\n", (int)size, rptr); return rptr; } -void *debug_calloc(size_t num, size_t size, char *file, int line) { +/** + @brief Debugging wrapper for calloc(size_t num, size_t size) + + @param[in] num Number of elements to allocate + @param[in] size Size of each element + @param[in] file Calling file + @param[in] line Calling line + @return A pointer to the allocated memory block on success, NULL on failure + */ +void *debug_calloc(const size_t num, const size_t size, const char *file, const int line) { void *ptr = (calloc)(num, size); printf("%s:%d: calloc(%d, %d)=%p\n", file, line, (int)num, (int)size, ptr); return ptr; } -#endif diff --git a/src/debug.h b/src/debug.h index c10c5e6..4e2a499 100644 --- a/src/debug.h +++ b/src/debug.h @@ -1,27 +1,54 @@ -// -// debug.h -// mobi -// -// Created by Bartek on 02.04.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file debug.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ -#ifndef mobi_debug_h -#define mobi_debug_h +#ifndef libmobi_debug_h +#define libmobi_debug_h -#include +#include "config.h" +#include "mobi.h" -#define MOBI_DEBUG 0 -#if MOBI_DEBUG -#define free(x) debug_free(x,__FILE__,__LINE__) -void debug_free(void *ptr, char *file, int line); -#define malloc(x) debug_malloc(x, __FILE__, __LINE__ ) -void *debug_malloc(size_t size, char *file, int line); -#define realloc(x, y) debug_realloc(x, y, __FILE__, __LINE__ ) -void *debug_realloc(void *ptr, size_t size, char *file, int line); -#define calloc(x, y) debug_calloc(x, y, __FILE__, __LINE__ ) -void *debug_calloc(size_t num, size_t size, char *file, int line); +#ifndef MOBI_DEBUG +#define MOBI_DEBUG 0 /**< Turn on debugging, set this on by running "configure --enable-debug" */ #endif +#if MOBI_DEBUG_ALLOC +/** + @defgroup mobi_debug Debug wrappers for memory allocation functions + + Set this on by running "configure --enable-debug-alloc" + @{ + */ +#define free(x) debug_free(x, __FILE__, __LINE__) +#define malloc(x) debug_malloc(x, __FILE__, __LINE__) +#define realloc(x, y) debug_realloc(x, y, __FILE__, __LINE__) +#define calloc(x, y) debug_calloc(x, y, __FILE__, __LINE__) +/** @} */ +#endif + +void debug_free(void *ptr, const char *file, const int line); +void *debug_malloc(const size_t size, const char *file, const int line); +void *debug_realloc(void *ptr, const size_t size, const char *file, const int line); +void *debug_calloc(const size_t num, const size_t size, const char *file, const int line); + +/** + @brief Macro for printing debug info to stderr. Wrapper for fprintf + @param[in] fmt Format + @param[in] ... Additional arguments + */ +#if (MOBI_DEBUG) +#define debug_print(fmt, ...) { \ + fprintf(stderr, "%s:%d:%s(): " fmt, __FILE__, \ + __LINE__, __func__, __VA_ARGS__); \ +} +#else +#define debug_print(fmt, ...) +#endif #endif diff --git a/src/index.c b/src/index.c new file mode 100644 index 0000000..da1995c --- /dev/null +++ b/src/index.c @@ -0,0 +1,402 @@ +/** @file index.c + * @brief Functions to parse index records + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + +#include +#include +#include + +#include "index.h" +#include "util.h" +#include "memory.h" +#include "debug.h" + +/** + @brief Parser of TAGX section of INDX record + + @param[in,out] buf MOBIBuffer structure, offset pointing at beginning of TAGX section + @param[in,out] tagx MOBITagx structure to be filled by the function + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +static MOBI_RET mobi_parse_tagx(MOBIBuffer *buf, MOBITagx *tagx) { + tagx->control_byte_count = 0; + tagx->tags_count = 0; + tagx->tags = NULL; + buf->offset += 4; /* skip header */ + const uint32_t tagx_header_length = buffer_get32(buf); + if (tagx_header_length < 16) { + debug_print("INDX wrong header length: %u\n", tagx_header_length); + return MOBI_DATA_CORRUPT; + } + tagx->control_byte_count = buffer_get32(buf); + const size_t tagx_data_length = (tagx_header_length - 12) / 4; + tagx->tags = malloc(tagx_header_length * sizeof(TAGXTags)); + if (tagx->tags == NULL) { + debug_print("%s", "Memory allocation failed for TAGX tags\n"); + return MOBI_MALLOC_FAILED; + } + size_t i = 0; + while (i < tagx_data_length) { + tagx->tags[i].tag = buffer_get8(buf); + tagx->tags[i].values_count = buffer_get8(buf); + tagx->tags[i].bitmask = buffer_get8(buf); + const uint8_t control_byte = buffer_get8(buf); + tagx->tags[i].control_byte = control_byte; + debug_print("tagx[%zu]:\t%i\t%i\t%i\t%i\n", i, tagx->tags[i].tag, tagx->tags[i].values_count, tagx->tags[i].bitmask, control_byte); + i++; + } + tagx->tags_count = i; + return MOBI_SUCCESS; +} + +/** + @brief Parser of IDXT section of INDX record + + @param[in,out] buf MOBIBuffer structure, offset pointing at beginning of TAGX section + @param[in,out] idxt MOBITagx structure to be filled by the function + @param[in] entries_count Number of index entries + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +static MOBI_RET mobi_parse_idxt(MOBIBuffer *buf, MOBIIdxt *idxt, const size_t entries_count) { + const uint32_t idxt_offset = (uint32_t) buf->offset; + idxt->offsets_count = 0; + char idxt_magic[5]; + buffer_getstring(idxt_magic, buf, 4); + if (strncmp(idxt_magic, IDXT_MAGIC, 4) != 0) { + debug_print("IDXT wrong magic: %s\n", idxt_magic); + return MOBI_DATA_CORRUPT; + } + size_t i = 0; + while (i < entries_count) { + /* entry offsets */ + idxt->offsets[i++] = buffer_get16(buf); + } + /* last entry end position is IDXT tag offset */ + idxt->offsets[i] = idxt_offset; + idxt->offsets_count = i; + return MOBI_SUCCESS; +} + +/** + @brief Parser of INDX index entry + + @param[in,out] indx MOBIIndx structure, to be filled with parsed data + @param[in] idxt MOBIIdxt structure with parsed IDXT index + @param[in] tagx MOBITagx structure with parsed TAGX index + @param[in,out] buf MOBIBuffer structure with index data + @param[in] curr_number Sequential number of an index entry for current record + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +static MOBI_RET mobi_parse_index_entry(MOBIIndx *indx, const MOBIIdxt idxt, const MOBITagx tagx, MOBIBuffer *buf, const size_t curr_number) { + if (indx == NULL) { + debug_print("%s", "INDX structure not initialized\n"); + return MOBI_INIT_FAILED; + } + const size_t entry_offset = indx->entries_count; + const size_t entry_length = idxt.offsets[curr_number + 1] - idxt.offsets[curr_number]; + buf->offset = idxt.offsets[curr_number]; + size_t entry_number = curr_number + entry_offset; + /* save original record maxlen */ + const size_t buf_maxlen = buf->maxlen; + if (buf->offset + entry_length > buf_maxlen) { + debug_print("Entry length too long: %zu\n", entry_length); + return MOBI_DATA_CORRUPT; + } + buf->maxlen = buf->offset + entry_length; + const size_t label_length = buffer_get8(buf); + if (label_length > entry_length) { + debug_print("Label length too long: %zu\n", label_length); + return MOBI_DATA_CORRUPT; + } + indx->entries[entry_number].label = malloc(label_length + 1); + buffer_getstring(indx->entries[entry_number].label, buf, label_length); + debug_print("tag label[%zu]: %s\n", entry_number, indx->entries[entry_number].label); + unsigned char *control_bytes; + control_bytes = buf->data + buf->offset; + buf->offset += tagx.control_byte_count; + if (tagx.tags_count > 0) { + typedef struct { + uint8_t tag; + uint8_t tag_value_count; + uint32_t value_count; + uint32_t value_bytes; + } MOBIPtagx; + MOBIPtagx ptagx[tagx.tags_count]; + uint32_t ptagx_count = 0; + size_t len; + indx->entries[entry_number].tags = malloc(tagx.tags_count * sizeof(MOBIIndexTag)); + size_t i = 0; + while (i < tagx.tags_count) { + if (tagx.tags[i].control_byte == 1) { + control_bytes++; + i++; + continue; + } + uint32_t value = control_bytes[0] & tagx.tags[i].bitmask; + if (value != 0) { + /* FIXME: is it safe to use MOBI_NOTSET? */ + uint32_t value_count = MOBI_NOTSET; + uint32_t value_bytes = MOBI_NOTSET; + /* all bits of masked value are set */ + if (value == tagx.tags[i].bitmask) { + /* more than 1 bit set */ + if (mobi_bitcount(tagx.tags[i].bitmask) > 1) { + /* read value bytes from entry */ + len = 0; + value_bytes = buffer_get_varlen(buf, &len); + } else { + value_count = 1; + } + } else { + uint8_t mask = tagx.tags[i].bitmask; + while ((mask & 1) == 0) { + mask >>= 1; + value >>= 1; + } + value_count = value; + } + ptagx[ptagx_count].tag = tagx.tags[i].tag; + ptagx[ptagx_count].tag_value_count = tagx.tags[i].values_count; + ptagx[ptagx_count].value_count = value_count; + ptagx[ptagx_count].value_bytes = value_bytes; + ptagx_count++; + } + i++; + } + indx->entries[entry_number].tags_count = ptagx_count; + i = 0; + while (i < ptagx_count) { + uint32_t tagvalues_count = 0; + /* FIXME: is it safe to use MOBI_NOTSET? */ + /* value count is set */ + if (ptagx[i].value_count != MOBI_NOTSET) { + size_t count = ptagx[i].value_count * ptagx[i].tag_value_count; + while (count-- && tagvalues_count < MOBI_INDX_MAXTAGVALUES) { + len = 0; + const uint32_t value_bytes = buffer_get_varlen(buf, &len); + indx->entries[entry_number].tags[i].tagvalues[tagvalues_count] = value_bytes; + tagvalues_count++; + } + /* value count is not set */ + } else { + /* read value_bytes bytes */ + len = 0; + while (len < ptagx[i].value_bytes && tagvalues_count < MOBI_INDX_MAXTAGVALUES) { + const uint32_t value_bytes = buffer_get_varlen(buf, &len); + indx->entries[entry_number].tags[i].tagvalues[tagvalues_count] = value_bytes; + tagvalues_count++; + } + } + indx->entries[entry_number].tags[i].tagid = ptagx[i].tag; + indx->entries[entry_number].tags[i].tagvalues_count = tagvalues_count; + i++; + } + } + /* restore buffer maxlen */ + buf->maxlen = buf_maxlen; + return MOBI_SUCCESS; +} + +/** + @brief Parser of INDX record + + @param[in] indx_record MOBIPdbRecord structure with INDX record + @param[in,out] indx MOBIIndx structure to be filled with parsed entries + @param[in,out] tagx MOBITagx structure, will be filled with parsed TAGX section data if present in the INDX record, + otherwise TAGX data will be used to parse the record + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_indx(const MOBIPdbRecord *indx_record, MOBIIndx *indx, MOBITagx *tagx) { + MOBI_RET ret; + MOBIBuffer *buf = buffer_init_null(indx_record->size); + if (buf == NULL) { + return MOBI_MALLOC_FAILED; + } + buf->data = indx_record->data; + char indx_magic[5]; + buffer_getstring(indx_magic, buf, 4); /* 0: INDX magic */ + const uint32_t header_length = buffer_get32(buf); /* 4: header length */ + if (strncmp(indx_magic, INDX_MAGIC, 4) != 0 || + header_length == 0) { + debug_print("INDX wrong magic: %s or header length: %u\n", indx_magic, header_length); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; + } + buf->offset += 4; /* 8: zeroes */ + /* FIXME: unused */ + indx->type = buffer_get32(buf); /* 12: 0 - normal, 2 - inflection */ + /* FIXME: unused */ + buf->offset += 4; /* 16: gen */ + const uint32_t idxt_offset = buffer_get32(buf); /* 20: IDXT offset */ + const size_t entries_count = buffer_get32(buf); /* 24: entries count */ + indx->encoding = buffer_get32(buf); /* 28: encoding */ + buf->offset += 4; /* 32: zeroes */ + const size_t total_entries_count = buffer_get32(buf); /* 36: total entries count */ + if (indx->total_entries_count == 0) { + indx->total_entries_count = total_entries_count; + } + indx->ordt_offset = buffer_get32(buf); /* 40: ORDT offset */ + indx->ligt_offset = buffer_get32(buf); /* 44: LIGT offset */ + indx->ordt_entries_count = buffer_get32(buf); /* 48: ORDT entries count */ + indx->cncx_records_count = buffer_get32(buf); /* 52: CNCX entries count */ + //buf->offset += 124; /* 56: unknown */ + // 164: ocnt + // 168: ORDT entries count + // 172: ORDT1 offset + // 176: ORDT2 offset + // 180: otagx + //uint32_t tagx_offset = buffer_get32(buf); /* 180: TAGX offset ? */ + /* FIXME: in dictionaries offset is moved by this value */ + //tagx_offset += buffer_get32(buf); + /* buf->offset += 4; // 184: zeroes */ + buf->offset = header_length; + + /* TAGX metadata */ + /* if record contains TAGX section, read it and return */ + if (buffer_match_magic(buf, TAGX_MAGIC)) { + ret = mobi_parse_tagx(buf, tagx); + buffer_free_null(buf); + indx->entries_count = entries_count; + return ret; + } + /* IDXT entries offsets */ + if (idxt_offset == 0) { + debug_print("%s", "Missing IDXT offset\n"); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; + } + buf->offset = idxt_offset; + MOBIIdxt idxt; + uint32_t offsets[entries_count + 1]; + idxt.offsets = offsets; + ret = mobi_parse_idxt(buf, &idxt, entries_count); + if (ret != MOBI_SUCCESS) { + debug_print("%s", "IDXT parsing failed\n"); + buffer_free_null(buf); + return ret; + } + /* parse entries */ + if (entries_count > 0) { + if (indx->entries == NULL) { + indx->entries = malloc(indx->total_entries_count * sizeof(MOBIIndexEntry)); + if (indx->entries == NULL) { + buffer_free_null(buf); + return MOBI_MALLOC_FAILED; + } + } + size_t i = 0; + while (i < entries_count) { + ret = mobi_parse_index_entry(indx, idxt, *tagx, buf, i++); + if (ret != MOBI_SUCCESS) { + buffer_free_null(buf); + return ret; + } + } + indx->entries_count += entries_count; + + } + buffer_free_null(buf); + return MOBI_SUCCESS; +} + +/** + @brief Parser of a set of index records + + @param[in] m MOBIData structure containing MOBI file metadata and data + @param[in,out] indx MOBIIndx structure to be filled with parsed entries + @param[in] indx_record_number Number of the first record of the set + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_index(const MOBIData *m, MOBIIndx *indx, const size_t indx_record_number) { + MOBI_RET ret; + /* tagx.tags array will be allocated in mobi_parse_tagx */ + MOBITagx tagx = {.tags = NULL}; + /* parse first meta INDX record */ + MOBIPdbRecord *record = mobi_get_record_by_seqnumber(m, indx_record_number); + ret = mobi_parse_indx(record, indx, &tagx); + if (ret != MOBI_SUCCESS) { + mobi_free_indx(indx); + free(tagx.tags); + indx = NULL; + return ret; + } + size_t cncx_count = indx->cncx_records_count; + /* parse remaining INDX records for the index */ + size_t count = indx->entries_count; + indx->entries_count = 0; + while (count--) { + record = record->next; + ret = mobi_parse_indx(record, indx, &tagx); + if (ret != MOBI_SUCCESS) { + mobi_free_indx(indx); + free(tagx.tags); + indx = NULL; + return ret; + } + } + /* copy pointer to first cncx record if present and set info from first record */ + if (cncx_count) { + indx->cncx_records_count = cncx_count; + indx->cncx_record = record->next; + } + free(tagx.tags); + return MOBI_SUCCESS; +} + +/** + @brief Get a value of tag[tagid][tagindex] for given index entry + + @param[in,out] tagvalue Will be set to a tag value + @param[in] entry Index entry to be search for the value + @param[in] tag_arr Array: tag_arr[0] = tagid, tag_arr[1] = tagindex + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_indxentry_tagvalue(uint32_t *tagvalue, const MOBIIndexEntry *entry, const unsigned tag_arr[]) { + if (entry == NULL) { + debug_print("%s", "INDX entry not initialized\n"); + return MOBI_INIT_FAILED; + } + size_t i = 0; + while (i < entry->tags_count) { + if (entry->tags[i].tagid == tag_arr[0]) { + *tagvalue = entry->tags[i].tagvalues[tag_arr[1]]; + return MOBI_SUCCESS; + } + i++; + } + debug_print("tag[%i][%i] not found in entry: %s\n", tag_arr[0], tag_arr[1], entry->label) + ; + return MOBI_DATA_CORRUPT; +} + + +/** + @brief Get compiled index entry string + + Allocates memory for the string. Must be freed by caller. + + @param[in] cncx_record MOBIPdbRecord structure with cncx record + @param[in] cncx_offset Offset of string entry from the beginning of the record + @return Entry string + */ +char * mobi_get_cncx_string(const MOBIPdbRecord *cncx_record, const uint32_t cncx_offset) { + /* TODO: handle multiple cncx records */ + MOBIBuffer *buf = buffer_init_null(cncx_record->size); + buf->data = cncx_record->data; + buf->offset = cncx_offset; + size_t len = 0; + const uint32_t string_length = buffer_get_varlen(buf, &len); + char *string = malloc(string_length + 1); + if (string) { + buffer_getstring(string, buf, string_length); + buffer_free_null(buf); + } + return string; +} diff --git a/src/index.h b/src/index.h new file mode 100644 index 0000000..5a3fa03 --- /dev/null +++ b/src/index.h @@ -0,0 +1,80 @@ +/** @file index.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + +#ifndef mobi_index_h +#define mobi_index_h + +#include "config.h" +#include "buffer.h" +#include "mobi.h" + +/** + @defgroup index_tag Predefined tag arrays: {tagid, tagindex} for mobi_get_indxentry_tagvalue() + @{ + */ +#define INDX_TAG_GUIDE_TITLE_CNCX (unsigned[]) {1, 0} /**< Guide title CNCX offset */ + +#define INDX_TAG_NCX_FILEPOS (unsigned[]) {1, 0} /**< NCX filepos offset */ +#define INDX_TAG_NCX_TEXT_CNCX (unsigned[]) {3, 0} /**< NCX text CNCX offset */ +#define INDX_TAG_NCX_LEVEL (unsigned[]) {4, 0} /**< NCX level */ +#define INDX_TAG_NCX_KIND_CNCX (unsigned[]) {5, 0} /**< NCX kind CNCX offset */ +#define INDX_TAG_NCX_POSFID (unsigned[]) {6, 0} /**< NCX pos:fid */ +#define INDX_TAG_NCX_POSOFF (unsigned[]) {6, 1} /**< NCX pos:off */ +#define INDX_TAG_NCX_PARENT (unsigned[]) {21, 0} /**< NCX parent */ +#define INDX_TAG_NCX_CHILD_START (unsigned[]) {22, 0} /**< NCX start child */ +#define INDX_TAG_NCX_CHILD_END (unsigned[]) {23, 0} /**< NCX last child */ + +#define INDX_TAG_SKEL_COUNT (unsigned[]) {1, 0} /**< Skel fragments count */ +#define INDX_TAG_SKEL_POSITION (unsigned[]) {6, 0} /**< Skel position */ +#define INDX_TAG_SKEL_LENGTH (unsigned[]) {6, 1} /**< Skel length */ + +#define INDX_TAG_FRAG_AID_CNCX (unsigned[]) {2, 0} /**< Frag aid CNCX offset */ +#define INDX_TAG_FRAG_FILE_NR (unsigned[]) {3, 0} /**< Frag file number */ +#define INDX_TAG_FRAG_SEQUENCE_NR (unsigned[]) {4, 0} /**< Frag sequence number */ +#define INDX_TAG_FRAG_POSITION (unsigned[]) {6, 0} /**< Frag position */ +#define INDX_TAG_FRAG_LENGTH (unsigned[]) {6, 1} /**< Frag length */ +/** @} */ + +/** + @brief Tag entries in TAGX section (for internal INDX parsing) + */ +typedef struct { + uint8_t tag; /**< Tag */ + uint8_t values_count; /**< Number of values */ + uint8_t bitmask; /**< Bitmask */ + uint8_t control_byte; /**< EOF control byte */ +} TAGXTags; + +/** + @brief Parsed TAGX section (for internal INDX parsing) + + TAGX tags hold metadata of index entries. + It is present in the first index record. + */ +typedef struct { + TAGXTags *tags; /**< Array of tag entries */ + size_t tags_count; /**< Number of tag entries */ + size_t control_byte_count; /**< Number of control bytes */ +} MOBITagx; + +/** + @brief Parsed IDXT section (for internal INDX parsing) + + IDXT section holds offsets to index entries + */ +typedef struct { + uint32_t *offsets; /**< Offsets to index entries */ + size_t offsets_count; /**< Offsets count */ +} MOBIIdxt; + +MOBI_RET mobi_parse_indx(const MOBIPdbRecord *indx_record, MOBIIndx *indx, MOBITagx *tagx); +MOBI_RET mobi_get_indxentry_tagvalue(uint32_t *tagvalue, const MOBIIndexEntry *entry, const unsigned tag_arr[]); +char * mobi_get_cncx_string(const MOBIPdbRecord *cncx_record, const uint32_t cncx_offset); +#endif diff --git a/src/memory.c b/src/memory.c index c872d4a..f9e286d 100644 --- a/src/memory.c +++ b/src/memory.c @@ -1,19 +1,33 @@ -// -// memory.c -// mobi -// -// Created by Bartek on 31.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// - -#include +/** @file memory.c + * @brief Functions for initializing and releasing structures and data containers + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + +#include #include "memory.h" +#include "debug.h" +#include "util.h" -MOBIData * mobi_init() { +/** + @brief Initializer for MOBIData structure + + It allocates memory for structure. + Memory should be freed with mobi_free(). + + @return MOBIData on success, NULL otherwise + */ +MOBIData * mobi_init(void) { MOBIData *m = NULL; m = calloc(1, sizeof(MOBIData)); if (m == NULL) return NULL; - m->use_kf8 = MOBI_USE_KF8; + m->use_kf8 = true; + m->kf8_boundary_offset = MOBI_NOTSET; m->ph = NULL; m->rh = NULL; m->mh = NULL; @@ -23,70 +37,87 @@ MOBIData * mobi_init() { return m; } -void mobi_free_mh(MOBIData *m) { - if (m->mh == NULL) { +/** + @brief Free MOBIMobiHeader structure + + @param[in] mh MOBIMobiHeader structure + */ +void mobi_free_mh(MOBIMobiHeader *mh) { + if (mh == NULL) { return; } - free(m->mh->header_length); - free(m->mh->mobi_type); - free(m->mh->text_encoding); - free(m->mh->uid); - free(m->mh->file_version); - free(m->mh->orth_index); - free(m->mh->infl_index); - free(m->mh->names_index); - free(m->mh->keys_index); - free(m->mh->extra0_index); - free(m->mh->extra1_index); - free(m->mh->extra2_index); - free(m->mh->extra3_index); - free(m->mh->extra4_index); - free(m->mh->extra5_index); - free(m->mh->non_text_index); - free(m->mh->full_name_offset); - free(m->mh->full_name_length); - free(m->mh->locale); - free(m->mh->input_lang); - free(m->mh->output_lang); - free(m->mh->min_version); - free(m->mh->image_index); - free(m->mh->huff_rec_index); - free(m->mh->huff_rec_count); - free(m->mh->huff_table_offset); - free(m->mh->huff_table_length); - free(m->mh->exth_flags); - free(m->mh->unknown6); - free(m->mh->drm_offset); - free(m->mh->drm_count); - free(m->mh->drm_size); - free(m->mh->drm_flags); - free(m->mh->first_text_index); - free(m->mh->last_text_index); - free(m->mh->unknown9); - free(m->mh->fcis_index); - free(m->mh->fcis_count); - free(m->mh->flis_index); - free(m->mh->flis_count); - free(m->mh->unknown10); - free(m->mh->unknown11); - free(m->mh->srcs_index); - free(m->mh->srcs_count); - free(m->mh->unknown12); - free(m->mh->unknown13); - free(m->mh->extra_flags); - free(m->mh->ncx_index); - free(m->mh->unknown14); - free(m->mh->unknown15); - free(m->mh->datp_index); - free(m->mh->unknown16); - free(m->mh->unknown17); - free(m->mh->unknown18); - free(m->mh->unknown19); - free(m->mh->unknown20); - free(m->mh); - m->mh = NULL; + free(mh->header_length); + free(mh->mobi_type); + free(mh->text_encoding); + free(mh->uid); + free(mh->version); + free(mh->orth_index); + free(mh->infl_index); + free(mh->names_index); + free(mh->keys_index); + free(mh->extra0_index); + free(mh->extra1_index); + free(mh->extra2_index); + free(mh->extra3_index); + free(mh->extra4_index); + free(mh->extra5_index); + free(mh->non_text_index); + free(mh->full_name_offset); + free(mh->full_name_length); + free(mh->locale); + free(mh->dict_input_lang); + free(mh->dict_output_lang); + free(mh->min_version); + free(mh->image_index); + free(mh->huff_rec_index); + free(mh->huff_rec_count); + free(mh->datp_rec_index); + free(mh->datp_rec_count); + free(mh->exth_flags); + free(mh->unknown6); + free(mh->drm_offset); + free(mh->drm_count); + free(mh->drm_size); + free(mh->drm_flags); + free(mh->fdst_index); + free(mh->first_text_index); + free(mh->last_text_index); + free(mh->fdst_section_count); + //free(mh->unknown9); + free(mh->fcis_index); + free(mh->fcis_count); + free(mh->flis_index); + free(mh->flis_count); + free(mh->unknown10); + free(mh->unknown11); + free(mh->srcs_index); + free(mh->srcs_count); + free(mh->unknown12); + free(mh->unknown13); + free(mh->extra_flags); + free(mh->ncx_index); + free(mh->fragment_index); + free(mh->skeleton_index); + free(mh->unknown14); + free(mh->unknown15); + free(mh->datp_index); + free(mh->guide_index); + free(mh->unknown16); + free(mh->unknown17); + free(mh->unknown18); + free(mh->unknown19); + free(mh->unknown20); + free(mh); + mh = NULL; } +/** + @brief Free all MOBIPdbRecord structures and its respective data attached to MOBIData structure + + Each MOBIPdbRecord structure holds metadata and data for each pdb record + + @param[in,out] m MOBIData structure + */ void mobi_free_rec(MOBIData *m) { MOBIPdbRecord *curr, *tmp; curr = m->rec; @@ -100,8 +131,15 @@ void mobi_free_rec(MOBIData *m) { m->rec = NULL; } +/** + @brief Free all MOBIExthHeader structures and its respective data attached to MOBIData structure + + Each MOBIExthHeader structure holds metadata and data for each EXTH record + + @param[in,out] m MOBIData structure + */ void mobi_free_eh(MOBIData *m) { - MOBIExtHeader *curr, *tmp; + MOBIExthHeader *curr, *tmp; curr = m->eh; while (curr != NULL) { tmp = curr; @@ -113,17 +151,22 @@ void mobi_free_eh(MOBIData *m) { m->eh = NULL; } +/** + @brief Free MOBIData structure and all its children + + @param[in] m MOBIData structure + */ void mobi_free(MOBIData *m) { if (m == NULL) { return; } - mobi_free_mh(m); + mobi_free_mh(m->mh); mobi_free_eh(m); mobi_free_rec(m); free(m->ph); free(m->rh); if (m->next) { - mobi_free_mh(m->next); + mobi_free_mh(m->next->mh); mobi_free_eh(m->next); free(m->next->rh); free(m->next); @@ -133,24 +176,216 @@ void mobi_free(MOBIData *m) { m = NULL; } -MOBIHuffCdic * mobi_init_huffcdic(MOBIData *m) { - MOBIHuffCdic *huffcdic; - int ret; - huffcdic = calloc(1, sizeof(MOBIHuffCdic)); +/** + @brief Initialize and return MOBIHuffCdic structure. + + MOBIHuffCdic structure holds parsed data from HUFF, CDIC records. + It is used for huffman decompression. + Initialized structure is a child of MOBIData structure. + It must be freed with mobi_free_huffcdic(). + + @return MOBIHuffCdic on success, NULL otherwise + */ +MOBIHuffCdic * mobi_init_huffcdic(void) { + MOBIHuffCdic *huffcdic = calloc(1, sizeof(MOBIHuffCdic)); if (huffcdic == NULL) { - printf("Memory allocation for huffcdic structure failed\n"); - return NULL; - } - ret = mobi_parse_huffdic(m, huffcdic); - if (ret == MOBI_ERROR) { - free(huffcdic); + debug_print("%s", "Memory allocation for huffcdic structure failed\n"); return NULL; } return huffcdic; } +/** + @brief Free MOBIHuffCdic structure and all its children + + @param[in] huffcdic MOBIData structure + */ void mobi_free_huffcdic(MOBIHuffCdic *huffcdic) { + if (huffcdic == NULL) { + return; + } free(huffcdic->symbol_offsets); free(huffcdic->symbols); free(huffcdic); + huffcdic = NULL; +} + +/** + @brief Initialize and return MOBIRawml structure. + + MOBIRawml structure holds parsed text record metadata. + It is used in the process of parsing rawml text data. + It must be freed with mobi_free_rawml(). + + @param[in] m Initialized MOBIData structure + @return MOBIRawml on success, NULL otherwise + */ +MOBIRawml * mobi_init_rawml(const MOBIData *m) { + MOBIRawml *rawml = malloc(sizeof(MOBIRawml)); + if (rawml == NULL) { + debug_print("%s", "Memory allocation failed for rawml structure\n"); + return NULL; + } + rawml->version = mobi_get_fileversion(m); + rawml->fdst = NULL; + rawml->skel = NULL; + rawml->frag = NULL; + rawml->guide = NULL; + rawml->ncx = NULL; + rawml->flow = NULL; + rawml->markup = NULL; + rawml->resources = NULL; + return rawml; +} + +/** + @brief Free MOBIFdst structure and all its children + + @param[in] fdst MOBIFdst structure + */ +void mobi_free_fdst(MOBIFdst *fdst) { + if (fdst == NULL) { + return; + } + if (fdst->fdst_section_count > 0) { + free(fdst->fdst_section_starts); + free(fdst->fdst_section_ends); + } + free(fdst); + fdst = NULL; +} + +/** + @brief Initialize and return MOBIIndx structure. + + MOBIIndx structure holds INDX index record entries. + Must be freed with mobi_free_indx() + + @return MOBIIndx on success, NULL otherwise + */ +MOBIIndx * mobi_init_indx(void) { + MOBIIndx *indx = calloc(1, sizeof(MOBIIndx)); + if (indx == NULL) { + debug_print("%s", "Memory allocation failed for indx structure\n"); + return NULL; + } + indx->entries = NULL; + indx->cncx_record = NULL; + return indx; +} + +/** + @brief Free index entries data and all its children + + @param[in] indx MOBIIndx structure that holds indx->entries + */ +void mobi_free_index_entries(MOBIIndx *indx) { + if (indx == NULL || indx->entries == NULL) { + return; + } + size_t i = 0; + while (i < indx->entries_count) { + free(indx->entries[i].label); + if (indx->entries[i].tags != NULL) { + free(indx->entries[i].tags); + } + i++; + } + free(indx->entries); + indx->entries = NULL; } + +/** + @brief Free MOBIIndx structure and all its children + + @param[in] indx MOBIIndx structure that holds indx->entries + */ +void mobi_free_indx(MOBIIndx *indx) { + if (indx == NULL) { + return; + } + mobi_free_index_entries(indx); + free(indx); + indx = NULL; +} + +/** + @brief Free MOBIPart structure + + Pointer to data may point to memory area also used by record->data. + So we need a flag to leave the memory allocated, while freeing MOBIPart structure + + @param[in] part MOBIPart structure + @param[in] free_data Flag, if set - a pointer to part->data is also released, otherwise not released + */ +void mobi_free_part(MOBIPart *part, int free_data) { + MOBIPart *curr, *tmp; + curr = part; + while (curr != NULL) { + tmp = curr; + curr = curr->next; + if (free_data) { free(tmp->data); } + free(tmp); + tmp = NULL; + } + part = NULL; +} + +/** + @brief Free MOBIPart structure for opf and ncx data + + @param[in] part MOBIPart structure + */ +void mobi_free_opf_data(MOBIPart *part) { + while (part != NULL) { + if (part->type == T_NCX || part->type == T_OPF) { + free(part->data); + } + part = part->next; + } +} + +/** + @brief Free MOBIPart structure for decoded font data + + @param[in] part MOBIPart structure + */ +void mobi_free_font_data(MOBIPart *part) { + while (part != NULL) { + if (part->type == T_OTF || part->type == T_TTF) { + free(part->data); + } + part = part->next; + } +} + +/** + @brief Free MOBIRawml structure allocated by mobi_init_rawml() + + Pointer to data may point to memory area also used by record->data. + So we need a flag to leave the memory allocated, while freeing MOBIPart structure + + @param[in] rawml MOBIRawml structure + */ +void mobi_free_rawml(MOBIRawml *rawml) { + if (rawml == NULL) { + return; + } + mobi_free_fdst(rawml->fdst); + mobi_free_indx(rawml->skel); + mobi_free_indx(rawml->frag); + mobi_free_indx(rawml->guide); + mobi_free_indx(rawml->ncx); + mobi_free_part(rawml->flow, true); + mobi_free_part(rawml->markup,true); + /* do not free resources data, these are links to records data */ + /* only free opf and ncx data */ + mobi_free_opf_data(rawml->resources); + /* and free decoded fonts data */ + mobi_free_font_data(rawml->resources); + mobi_free_part(rawml->resources, false); + free(rawml); + rawml = NULL; +} + + diff --git a/src/memory.h b/src/memory.h index 6477bab..5db9b1d 100644 --- a/src/memory.h +++ b/src/memory.h @@ -1,23 +1,30 @@ -// -// memory.h -// mobi -// -// Created by Bartek on 31.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file memory.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ -#ifndef mobi_memory_h -#define mobi_memory_h +#ifndef libmobi_memory_h +#define libmobi_memory_h +#include "config.h" #include "mobi.h" -MOBIData * mobi_init(); -void mobi_free_mh(MOBIData *m); +MOBIData * mobi_init(void); +void mobi_free_mh(MOBIMobiHeader *mh); void mobi_free_rec(MOBIData *m); void mobi_free_eh(MOBIData *m); void mobi_free(MOBIData *m); -MOBIHuffCdic * mobi_init_huffcdic(MOBIData *m); +MOBIHuffCdic * mobi_init_huffcdic(void); void mobi_free_huffcdic(MOBIHuffCdic *huffcdic); +MOBIIndx * mobi_init_indx(void); +void mobi_free_indx(MOBIIndx *indx); +void mobi_free_index_entries(MOBIIndx *indx); + #endif diff --git a/src/miniz.c b/src/miniz.c new file mode 100644 index 0000000..9bfc8cc --- /dev/null +++ b/src/miniz.c @@ -0,0 +1,4919 @@ +/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + See "unlicense" statement at the end of this file. + Rich Geldreich , last updated Oct. 13, 2013 + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define + MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). + + * Change History + 10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major release with Zip64 support (almost there!): + - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug (thanks kahmyong.moon@hp.com) which could cause locate files to not find files. This bug + would only have occured in earlier versions if you explicitly used this flag, OR if you used mz_zip_extract_archive_file_to_heap() or mz_zip_add_mem_to_archive_file_in_place() + (which used this flag). If you can't switch to v1.15 but want to fix this bug, just remove the uses of this flag from both helper funcs (and of course don't use the flag). + - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when pUser_read_buf is not NULL and compressed size is > uncompressed size + - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract compressed data from directory entries, to account for weird zipfiles which contain zero-size compressed data on dir entries. + Hopefully this fix won't cause any issues on weird zip archives, because it assumes the low 16-bits of zip external attributes are DOS attributes (which I believe they always are in practice). + - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the internal attributes, just the filename and external attributes + - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed + - Added cmake support for Linux builds which builds all the examples, tested with clang v3.3 and gcc v4.6. + - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti + - Merged MZ_FORCEINLINE fix from hdeanclark + - Fix include before config #ifdef, thanks emil.brink + - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping (super useful for OpenGL apps), and explicit control over the compression level (so you can + set it to 1 for real-time compression). + - Merged in some compiler fixes from paulharris's github repro. + - Retested this build under Windows (VS 2010, including static analysis), tcc 0.9.26, gcc v4.6 and clang v3.3. + - Added example6.c, which dumps an image of the mandelbrot set to a PNG file. + - Modified example2 to help test the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more. + - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix possible src file fclose() leak if alignment bytes+local header file write faiiled + - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): Was pushing the wrong central dir header offset, appears harmless in this release, but it became a problem in the zip64 branch + 5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, #include (thanks fermtect). + 5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. + - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and re-ran a randomized regression test on ~500k files. + - Eliminated a bunch of warnings when compiling with GCC 32-bit/64. + - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze (static analysis) option and fixed all warnings (except for the silly + "Use of the comma-operator in a tested expression.." analysis warning, which I purposely use to work around a MSVC compiler warning). + - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and tested Linux executables. The codeblocks workspace is compatible with Linux+Win32/x64. + - Added miniz_tester solution/project, which is a useful little app derived from LZHAM's tester app that I use as part of the regression test. + - Ran miniz.c and tinfl.c through another series of regression testing on ~500,000 files and archives. + - Modified example5.c so it purposely disables a bunch of high-level functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the MINIZ_NO_STDIO bug report.) + - Fix ftell() usage in examples so they exit with an error on files which are too large (a limitation of the examples, not miniz itself). + 4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple minor level_and_flags issues in the archive API's. + level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce Dawson for the feedback/bug report. + 5/28/11 v1.11 - Added statement from unlicense.org + 5/27/11 v1.10 - Substantial compressor optimizations: + - Level 1 is now ~4x faster than before. The L1 compressor's throughput now varies between 70-110MB/sec. on a + - Core i7 (actual throughput varies depending on the type of data, and x64 vs. x86). + - Improved baseline L2-L9 compression perf. Also, greatly improved compression perf. issues on some file types. + - Refactored the compression code for better readability and maintainability. + - Added level 10 compression level (L10 has slightly better ratio than level 9, but could have a potentially large + drop in throughput on some files). + 5/15/11 v1.09 - Initial stable release. + + * Low-level Deflate/Inflate implementation notes: + + Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or + greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses + approximately as well as zlib. + + Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function + coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory + block large enough to hold the entire file. + + The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. + + * zlib-style API notes: + + miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in + zlib replacement in many apps: + The z_stream struct, optional memory allocation callbacks + deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound + inflateInit/inflateInit2/inflate/inflateEnd + compress, compress2, compressBound, uncompress + CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. + Supports raw deflate streams or standard zlib streams with adler-32 checking. + + Limitations: + The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. + I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but + there are no guarantees that miniz.c pulls this off perfectly. + + * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by + Alex Evans. Supports 1-4 bytes/pixel images. + + * ZIP archive API notes: + + The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to + get the job done with minimal fuss. There are simple API's to retrieve file information, read files from + existing archives, create new archives, append new files to existing archives, or clone archive data from + one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), + or you can specify custom file read/write callbacks. + + - Archive reading: Just call this function to read a single file from a disk archive: + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + + For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central + directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. + + - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + + The locate operation can optionally check file comments too, which (as one example) can be used to identify + multiple versions of the same file in an archive. This function uses a simple linear search through the central + directory, so it's not very fast. + + Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and + retrieve detailed info on each file by calling mz_zip_reader_file_stat(). + + - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data + to disk and builds an exact image of the central directory in memory. The central directory image is written + all at once at the end of the archive file when the archive is finalized. + + The archive writer can optionally align each file's local header and file data to any power of 2 alignment, + which can be useful when the archive will be read from optical media. Also, the writer supports placing + arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still + readable by any ZIP tool. + + - Archive appending: The simple way to add a single file to an archive is to call this function: + + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, + const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + + The archive will be created if it doesn't already exist, otherwise it'll be appended to. + Note the appending is done in-place and is not an atomic operation, so if something goes wrong + during the operation it's possible the archive could be left without a central directory (although the local + file headers and file data will be fine, so the archive will be recoverable). + + For more complex archive modification scenarios: + 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to + preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the + compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and + you're done. This is safe but requires a bunch of temporary disk space or heap memory. + + 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), + append new files as needed, then finalize the archive which will write an updated central directory to the + original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a + possibility that the archive's central directory could be lost with this method if anything goes wrong, though. + + - ZIP archive support limitations: + No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. + Requires streams capable of seeking. + + * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the + below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. + + * Important: For best perf. be sure to customize the below macros for your target platform: + #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 + #define MINIZ_LITTLE_ENDIAN 1 + #define MINIZ_HAS_64BIT_REGISTERS 1 + + * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz + uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files + (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). + */ + +#ifndef MINIZ_HEADER_INCLUDED +#define MINIZ_HEADER_INCLUDED + +#include + +// Defines to completely disable specific portions of miniz.c: +// If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. + +// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. +//#define MINIZ_NO_STDIO + +// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or +// get/set file times, and the C run-time funcs that get/set times won't be called. +// The current downside is the times written to your archives will be from 1979. +//#define MINIZ_NO_TIME + +// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_APIS + +// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. +//#define MINIZ_NO_ZLIB_APIS + +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. +//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. +// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc +// callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user +// functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. +//#define MINIZ_NO_MALLOC + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) +// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux +#define MINIZ_NO_TIME +#endif + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) +#include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. +#define MINIZ_X86_OR_X64_CPU 1 +#endif + +#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif + +#if MINIZ_X86_OR_X64_CPU +// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). +#define MINIZ_HAS_64BIT_REGISTERS 1 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + // ------------------- zlib-style API Definitions. + + // For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! + typedef unsigned long mz_ulong; + + // mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. + void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) + // mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. + mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) + // mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. + mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + + // Compression strategies. + enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; + + // Method +#define MZ_DEFLATED 8 + +#ifndef MINIZ_NO_ZLIB_APIS + + // Heap allocation callbacks. + // Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. + typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); + typedef void (*mz_free_func)(void *opaque, void *address); + typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +#define MZ_VERSION "9.1.15" +#define MZ_VERNUM 0x91F0 +#define MZ_VER_MAJOR 9 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 15 +#define MZ_VER_SUBREVISION 0 + + // Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). + enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; + + // Return status codes. MZ_PARAM_ERROR is non-standard. + enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; + + // Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. + enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; + + // Window bits +#define MZ_DEFAULT_WINDOW_BITS 15 + + struct mz_internal_state; + + // Compression/decompression stream struct. + typedef struct mz_stream_s + { + const unsigned char *next_in; // pointer to next byte to read + unsigned int avail_in; // number of bytes available at next_in + mz_ulong total_in; // total number of bytes consumed so far + + unsigned char *next_out; // pointer to next byte to write + unsigned int avail_out; // number of bytes that can be written to next_out + mz_ulong total_out; // total number of bytes produced so far + + char *msg; // error msg (unused) + struct mz_internal_state *state; // internal state, allocated by zalloc/zfree + + mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc) + mz_free_func zfree; // optional heap free function (defaults to free) + void *opaque; // heap alloc function user pointer + + int data_type; // data_type (unused) + mz_ulong adler; // adler32 of the source or uncompressed data + mz_ulong reserved; // not used + } mz_stream; + + typedef mz_stream *mz_streamp; + + // Returns the version string of miniz.c. + const char *mz_version(void); + + // mz_deflateInit() initializes a compressor with default options: + // Parameters: + // pStream must point to an initialized mz_stream struct. + // level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. + // level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. + // (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) + // Return values: + // MZ_OK on success. + // MZ_STREAM_ERROR if the stream is bogus. + // MZ_PARAM_ERROR if the input parameters are bogus. + // MZ_MEM_ERROR on out of memory. + int mz_deflateInit(mz_streamp pStream, int level); + + // mz_deflateInit2() is like mz_deflate(), except with more control: + // Additional parameters: + // method must be MZ_DEFLATED + // window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) + // mem_level must be between [1, 9] (it's checked but ignored by miniz.c) + int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + + // Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). + int mz_deflateReset(mz_streamp pStream); + + // mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. + // Parameters: + // pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. + // flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. + // Return values: + // MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). + // MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. + // MZ_STREAM_ERROR if the stream is bogus. + // MZ_PARAM_ERROR if one of the parameters is invalid. + // MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) + int mz_deflate(mz_streamp pStream, int flush); + + // mz_deflateEnd() deinitializes a compressor: + // Return values: + // MZ_OK on success. + // MZ_STREAM_ERROR if the stream is bogus. + int mz_deflateEnd(mz_streamp pStream); + + // mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. + mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + + // Single-call compression functions mz_compress() and mz_compress2(): + // Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. + int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); + + // mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). + mz_ulong mz_compressBound(mz_ulong source_len); + + // Initializes a decompressor. + int mz_inflateInit(mz_streamp pStream); + + // mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: + // window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). + int mz_inflateInit2(mz_streamp pStream, int window_bits); + + // Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. + // Parameters: + // pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. + // flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. + // On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). + // MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. + // Return values: + // MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. + // MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. + // MZ_STREAM_ERROR if the stream is bogus. + // MZ_DATA_ERROR if the deflate stream is invalid. + // MZ_PARAM_ERROR if one of the parameters is invalid. + // MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again + // with more input data, or with more room in the output buffer (except when using single call decompression, described above). + int mz_inflate(mz_streamp pStream, int flush); + + // Deinitializes a decompressor. + int mz_inflateEnd(mz_streamp pStream); + + // Single-call decompression. + // Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. + int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + + // Returns a string description of the specified error code, or NULL if the error code is invalid. + const char *mz_error(int err); + + // Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. + // Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + typedef unsigned char Byte; + typedef unsigned int uInt; + typedef mz_ulong uLong; + typedef Byte Bytef; + typedef uInt uIntf; + typedef char charf; + typedef int intf; + typedef void *voidpf; + typedef uLong uLongf; + typedef void *voidp; + typedef void *const voidpc; +#define Z_NULL 0 +#define Z_NO_FLUSH MZ_NO_FLUSH +#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH +#define Z_SYNC_FLUSH MZ_SYNC_FLUSH +#define Z_FULL_FLUSH MZ_FULL_FLUSH +#define Z_FINISH MZ_FINISH +#define Z_BLOCK MZ_BLOCK +#define Z_OK MZ_OK +#define Z_STREAM_END MZ_STREAM_END +#define Z_NEED_DICT MZ_NEED_DICT +#define Z_ERRNO MZ_ERRNO +#define Z_STREAM_ERROR MZ_STREAM_ERROR +#define Z_DATA_ERROR MZ_DATA_ERROR +#define Z_MEM_ERROR MZ_MEM_ERROR +#define Z_BUF_ERROR MZ_BUF_ERROR +#define Z_VERSION_ERROR MZ_VERSION_ERROR +#define Z_PARAM_ERROR MZ_PARAM_ERROR +#define Z_NO_COMPRESSION MZ_NO_COMPRESSION +#define Z_BEST_SPEED MZ_BEST_SPEED +#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION +#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION +#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY +#define Z_FILTERED MZ_FILTERED +#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY +#define Z_RLE MZ_RLE +#define Z_FIXED MZ_FIXED +#define Z_DEFLATED MZ_DEFLATED +#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS +#define alloc_func mz_alloc_func +#define free_func mz_free_func +#define internal_state mz_internal_state +#define z_stream mz_stream +#define deflateInit mz_deflateInit +#define deflateInit2 mz_deflateInit2 +#define deflateReset mz_deflateReset +#define deflate mz_deflate +#define deflateEnd mz_deflateEnd +#define deflateBound mz_deflateBound +#define compress mz_compress +#define compress2 mz_compress2 +#define compressBound mz_compressBound +#define inflateInit mz_inflateInit +#define inflateInit2 mz_inflateInit2 +#define inflate mz_inflate +#define inflateEnd mz_inflateEnd +#define uncompress mz_uncompress +#define crc32 mz_crc32 +#define adler32 mz_adler32 +#define MAX_WBITS 15 +#define MAX_MEM_LEVEL 9 +#define zError mz_error +#define ZLIB_VERSION MZ_VERSION +#define ZLIB_VERNUM MZ_VERNUM +#define ZLIB_VER_MAJOR MZ_VER_MAJOR +#define ZLIB_VER_MINOR MZ_VER_MINOR +#define ZLIB_VER_REVISION MZ_VER_REVISION +#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION +#define zlibVersion mz_version +#define zlib_version mz_version() +#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif // MINIZ_NO_ZLIB_APIS + + // ------------------- Types and macros + + typedef unsigned char mz_uint8; + typedef signed short mz_int16; + typedef unsigned short mz_uint16; + typedef unsigned int mz_uint32; + typedef unsigned int mz_uint; + typedef long long mz_int64; + typedef unsigned long long mz_uint64; + typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + + // An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message. +#ifdef _MSC_VER +#define MZ_MACRO_END while (0, 0) +#else +#define MZ_MACRO_END while (0) +#endif + + // ------------------- ZIP archive reading/writing + +#ifndef MINIZ_NO_ARCHIVE_APIS + + enum + { + MZ_ZIP_MAX_IO_BUF_SIZE = 64*1024, + MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260, + MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256 + }; + + typedef struct + { + mz_uint32 m_file_index; + mz_uint32 m_central_dir_ofs; + mz_uint16 m_version_made_by; + mz_uint16 m_version_needed; + mz_uint16 m_bit_flag; + mz_uint16 m_method; +#ifndef MINIZ_NO_TIME + time_t m_time; +#endif + mz_uint32 m_crc32; + mz_uint64 m_comp_size; + mz_uint64 m_uncomp_size; + mz_uint16 m_internal_attr; + mz_uint32 m_external_attr; + mz_uint64 m_local_header_ofs; + mz_uint32 m_comment_size; + char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; + char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; + } mz_zip_archive_file_stat; + + typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n); + typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n); + + struct mz_zip_internal_state_tag; + typedef struct mz_zip_internal_state_tag mz_zip_internal_state; + + typedef enum + { + MZ_ZIP_MODE_INVALID = 0, + MZ_ZIP_MODE_READING = 1, + MZ_ZIP_MODE_WRITING = 2, + MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 + } mz_zip_mode; + + typedef struct mz_zip_archive_tag + { + mz_uint64 m_archive_size; + mz_uint64 m_central_directory_file_ofs; + mz_uint m_total_files; + mz_zip_mode m_zip_mode; + + mz_uint m_file_offset_alignment; + + mz_alloc_func m_pAlloc; + mz_free_func m_pFree; + mz_realloc_func m_pRealloc; + void *m_pAlloc_opaque; + + mz_file_read_func m_pRead; + mz_file_write_func m_pWrite; + void *m_pIO_opaque; + + mz_zip_internal_state *m_pState; + + } mz_zip_archive; + + typedef enum + { + MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, + MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, + MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, + MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800 + } mz_zip_flags; + + // ZIP archive reading + + // Inits a ZIP archive reader. + // These functions read and validate the archive's central directory. + mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags); + mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags); + +#ifndef MINIZ_NO_STDIO + mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags); +#endif + + // Returns the total number of files in the archive. + mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); + + // Returns detailed information about an archive file entry. + mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat); + + // Determines if an archive file entry is a directory entry. + mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index); + mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index); + + // Retrieves the filename of an archive file entry. + // Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. + mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size); + + // Attempts to locates a file in the archive's central directory. + // Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH + // Returns -1 if the file cannot be found. + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + + // Extracts a archive file to a memory buffer using no memory allocation. + mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); + mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); + + // Extracts a archive file to a memory buffer. + mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags); + mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags); + + // Extracts a archive file to a dynamically allocated heap buffer. + void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags); + void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags); + + // Extracts a archive file using a callback function to output the file's data. + mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); + mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); + +#ifndef MINIZ_NO_STDIO + // Extracts a archive file to a disk file and sets its last accessed and modified times. + // This function only extracts files, not archive directory records. + mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags); + mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags); +#endif + + // Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. + mz_bool mz_zip_reader_end(mz_zip_archive *pZip); + + // ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + + // Inits a ZIP archive writer. + mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); + mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); + +#ifndef MINIZ_NO_STDIO + mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning); +#endif + + // Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. + // For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. + // For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). + // Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. + // Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before + // the archive is finalized the file's central directory will be hosed. + mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename); + + // Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. + // To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer. + // level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. + mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags); + mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); + +#ifndef MINIZ_NO_STDIO + // Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. + // level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. + mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); +#endif + + // Adds a file to an archive by fully cloning the data from another archive. + // This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields. + mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index); + + // Finalizes the archive by writing the central directory records followed by the end of central directory record. + // After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). + // An archive must be manually finalized by calling this function for it to be valid. + mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); + mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize); + + // Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. + // Note for the archive to be valid, it must have been finalized before ending. + mz_bool mz_zip_writer_end(mz_zip_archive *pZip); + + // Misc. high-level helper functions: + + // mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. + // level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + + // Reads a single file from an archive into a heap block. + // Returns NULL on failure. + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags); + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + + // ------------------- Low-level Decompression API Definitions + + // Decompression flags used by tinfl_decompress(). + // TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. + // TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. + // TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). + // TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. + enum + { + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 + }; + + // High level decompression functions: + // tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). + // On entry: + // pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. + // On return: + // Function returns a pointer to the decompressed data, or NULL on failure. + // *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. + // The caller must call mz_free() on the returned block when it's no longer needed. + void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + + // tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. + // Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) + size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + + // tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. + // Returns 1 on success or 0 on failure. + typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); + int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + + struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; + + // Max size of LZ dictionary. +#define TINFL_LZ_DICT_SIZE 32768 + + // Return status. + typedef enum + { + TINFL_STATUS_BAD_PARAM = -3, + TINFL_STATUS_ADLER32_MISMATCH = -2, + TINFL_STATUS_FAILED = -1, + TINFL_STATUS_DONE = 0, + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + TINFL_STATUS_HAS_MORE_OUTPUT = 2 + } tinfl_status; + + // Initializes the decompressor to its initial state. +#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + + // Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. + // This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. + tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); + + // Internal/private bits follow. + enum + { + TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS + }; + + typedef struct + { + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; + } tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS +#define TINFL_USE_64BIT_BITBUF 1 +#endif + +#if TINFL_USE_64BIT_BITBUF + typedef mz_uint64 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (64) +#else + typedef mz_uint32 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (32) +#endif + + struct tinfl_decompressor_tag + { + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; + }; + + // ------------------- Low-level Compression API Definitions + + // Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). +#define TDEFL_LESS_MEMORY 0 + + // tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): + // TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). + enum + { + TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF + }; + + // TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. + // TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). + // TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. + // TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). + // TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) + // TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. + // TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. + // TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. + // The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). + enum + { + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 + }; + + // High level compression functions: + // tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). + // On entry: + // pSrc_buf, src_buf_len: Pointer and size of source block to compress. + // flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. + // On return: + // Function returns a pointer to the compressed data, or NULL on failure. + // *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. + // The caller must free() the returned block when it's no longer needed. + void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + + // tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. + // Returns 0 on failure. + size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + + // Compresses an image to a compressed PNG file in memory. + // On entry: + // pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. + // The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. + // level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL + // If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). + // On return: + // Function returns a pointer to the compressed data, or NULL on failure. + // *pLen_out will be set to the size of the PNG image file. + // The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. + void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); + void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); + + // Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. + typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); + + // tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. + mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + + enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; + + // TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). +#if TDEFL_LESS_MEMORY + enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#else + enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#endif + + // The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. + typedef enum + { + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1, + } tdefl_status; + + // Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums + typedef enum + { + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 + } tdefl_flush; + + // tdefl's compression state structure. + typedef struct + { + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; + } tdefl_compressor; + + // Initializes the compressor. + // There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. + // pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. + // If pBut_buf_func is NULL the user should always call the tdefl_compress() API. + // flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) + tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + + // Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. + tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); + + // tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. + // tdefl_compress_buffer() always consumes the entire input buffer. + tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); + + tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); + mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + + // Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros. +#ifndef MINIZ_NO_ZLIB_APIS + // Create tdefl_compress() flags given zlib-style compression parameters. + // level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) + // window_bits may be -15 (raw deflate) or 15 (zlib) + // strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED + mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); +#endif // #ifndef MINIZ_NO_ZLIB_APIS + +#ifdef __cplusplus +} +#endif + +#endif // MINIZ_HEADER_INCLUDED + +// ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.) + +#ifndef MINIZ_HEADER_FILE_ONLY + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1]; + +#include +#include + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC +#define MZ_MALLOC(x) NULL +#define MZ_FREE(x) (void)x, ((void)0) +#define MZ_REALLOC(p, x) NULL +#else +#define MZ_MALLOC(x) malloc(x) +#define MZ_FREE(x) free(x) +#define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a,b) (((a)>(b))?(a):(b)) +#define MZ_MIN(a,b) (((a)<(b))?(a):(b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) +#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else +#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) +#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#ifdef _MSC_VER +#define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define MZ_FORCEINLINE inline __attribute__((__always_inline__)) +#else +#define MZ_FORCEINLINE inline +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + // ------------------- zlib-style API's + + mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) + { + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; + if (!ptr) return MZ_ADLER32_INIT; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + return (s2 << 16) + s1; + } + + // Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ + mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) + { + static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) return MZ_CRC32_INIT; + crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } + return ~crcu32; + } + + void mz_free(void *p) + { + MZ_FREE(p); + } + +#ifndef MINIZ_NO_ZLIB_APIS + + static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } + static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); } + static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } + + const char *mz_version(void) + { + return MZ_VERSION; + } + + int mz_deflateInit(mz_streamp pStream, int level) + { + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); + } + + int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) + { + tdefl_compressor *pComp; + mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) + { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; + } + + int mz_deflateReset(mz_streamp pStream) + { + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags); + return MZ_OK; + } + + int mz_deflate(mz_streamp pStream, int flush) + { + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; + if (!pStream->avail_out) return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; + for ( ; ; ) + { + tdefl_status defl_status; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) + { + mz_status = MZ_STREAM_ERROR; + break; + } + else if (defl_status == TDEFL_STATUS_DONE) + { + mz_status = MZ_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; // Can't make forward progress without some input. + } + } + return mz_status; + } + + int mz_deflateEnd(mz_streamp pStream) + { + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; + } + + mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) + { + (void)pStream; + // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) + return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); + } + + int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) + { + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); + } + + int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) + { + return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); + } + + mz_ulong mz_compressBound(mz_ulong source_len) + { + return mz_deflateBound(NULL, source_len); + } + + typedef struct + { + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; + } inflate_state; + + int mz_inflateInit2(mz_streamp pStream, int window_bits) + { + inflate_state *pDecomp; + if (!pStream) return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); + if (!pDecomp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; + } + + int mz_inflateInit(mz_streamp pStream) + { + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); + } + + int mz_inflate(mz_streamp pStream, int flush) + { + inflate_state* pState; + mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + + pState = (inflate_state*)pStream->state; + if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; pState->m_first_call = 0; + if (pState->m_last_status < 0) return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) + { + // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) + { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + // flush != MZ_FINISH then we must assume there's more input. + if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) + { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + for ( ; ; ) + { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. + else if (flush == MZ_FINISH) + { + // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } + else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + int mz_inflateEnd(mz_streamp pStream) + { + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; + } + + int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) + { + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) + return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); + } + + const char *mz_error(int err) + { + static struct { int m_err; const char *m_pDesc; } s_error_descs[] = + { + { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, + { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } + }; + mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; + return NULL; + } + +#endif //MINIZ_NO_ZLIB_APIS + + // ------------------- Low-level Decompression (completely independent from all compression API's) + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN switch(r->m_state) { case 0: +#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END +#define TINFL_CR_FINISH } + + // TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never + // reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario. +#define TINFL_GET_BYTE(state_index, c) do { \ +if (pIn_buf_cur >= pIn_buf_end) { \ +for ( ; ; ) { \ +if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ +TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ +if (pIn_buf_cur < pIn_buf_end) { \ +c = *pIn_buf_cur++; \ +break; \ +} \ +} else { \ +c = 0; \ +break; \ +} \ +} \ +} else c = *pIn_buf_cur++; } MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END + + // TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. + // It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a + // Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the + // bit buffer contains >=15 bits (deflate's max. Huffman code size). +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ +do { \ +temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ +if (temp >= 0) { \ +code_len = temp >> 9; \ +if ((code_len) && (num_bits >= code_len)) \ +break; \ +} else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ +code_len = TINFL_FAST_LOOKUP_BITS; \ +do { \ +temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ +} while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \ +} TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \ +} while (num_bits < 15); + + // TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read + // beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully + // decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. + // The slow path is only executed at the very end of the input buffer. +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \ +int temp; mz_uint code_len, c; \ +if (num_bits < 15) { \ +if ((pIn_buf_end - pIn_buf_cur) < 2) { \ +TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ +} else { \ +bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \ +} \ +} \ +if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ +code_len = temp >> 9, temp &= 511; \ +else { \ +code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \ +} sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END + + tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) + { + static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; + static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + static const int s_min_table_sizes[3] = { 257, 1, 4 }; + + tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; + + // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). + if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } + + num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4))))); + if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } + } + + do + { + TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; + if (r->m_type == 0) + { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } + while ((counter) && (num_bits)) + { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) + { + size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } + while (pIn_buf_cur >= pIn_buf_end) + { + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) + { + TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); + } + else + { + TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); + } + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; + } + } + else if (r->m_type == 3) + { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } + else + { + if (r->m_type == 1) + { + mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i; + r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for ( i = 0; i <= 143; ++i) *p++ = 8; for ( ; i <= 255; ++i) *p++ = 9; for ( ; i <= 279; ++i) *p++ = 7; for ( ; i <= 287; ++i) *p++ = 8; + } + else + { + for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; } + r->m_table_sizes[2] = 19; + } + for ( ; (int)r->m_type >= 0; r->m_type--) + { + int tree_next, tree_cur; tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } + if ((65536 != total) && (used_syms > 1)) + { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) + { + mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue; + cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; } + if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) + { + for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); ) + { + mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } + if ((dist == 16) && (!counter)) + { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) + { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + } + } + for ( ; ; ) + { + mz_uint8 *pSrc; + for ( ; ; ) + { + if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) + { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) + break; + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)counter; + } + else + { + int sym2; mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } +#else + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + counter = sym2; bit_buf >>= code_len; num_bits -= code_len; + if (counter & 256) + break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + bit_buf >>= code_len; num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) + { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) break; + + num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) + { + while (counter--) + { + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) + { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do + { + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) + { + if (counter) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + do + { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; pSrc += 3; + } while ((int)(counter -= 3) > 2); + if ((int)counter > 0) + { + pOut_buf_cur[0] = pSrc[0]; + if ((int)counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + TINFL_CR_FINISH + + common_exit: + r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) + { + const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; + } + + // Higher level helper functions. + void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) + { + tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) break; + new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; + } + return pBuf; + } + + size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) + { + tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); + status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; + } + + int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) + { + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) + return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) + { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; + } + + // ------------------- Low-level Compression (independent from all decompression API's) + + // Purposely making these tables static for faster init and thread safety. + static const mz_uint16 s_tdefl_len_sym[256] = { + 257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272, + 273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276, + 277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278, + 279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280, + 281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281, + 282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282, + 283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283, + 284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 }; + + static const mz_uint8 s_tdefl_len_extra[256] = { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 }; + + static const mz_uint8 s_tdefl_small_dist_sym[512] = { + 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 }; + + static const mz_uint8 s_tdefl_small_dist_extra[512] = { + 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7 }; + + static const mz_uint8 s_tdefl_large_dist_sym[128] = { + 0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26, + 26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 }; + + static const mz_uint8 s_tdefl_large_dist_extra[128] = { + 0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 }; + + // Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. + typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; + static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1) + { + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const mz_uint32* pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } + } + return pCur_syms; + } + + // tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. + static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) + { + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } + avbl = 2*used; dpth++; used = 0; + } + } + + // Limits canonical Huffman code table's max code size. + enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; + static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) + { + int i; mz_uint32 total = 0; if (code_list_len <= 1) return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + total--; + } + } + + static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) + { + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes); + if (static_table) + { + for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; + } + else + { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) + { + mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; + code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } + } + +#define TDEFL_PUT_BITS(b, l) do { \ +mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \ +d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \ +while (d->m_bits_in >= 8) { \ +if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ +*d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ +d->m_bit_buffer >>= 8; \ +d->m_bits_in -= 8; \ +} \ +} MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \ +if (rle_repeat_count < 3) { \ +d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ +while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ +} else { \ +d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ +} rle_repeat_count = 0; } } + +#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \ +if (rle_z_count < 3) { \ +d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ +} else if (rle_z_count <= 10) { \ +d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ +} else { \ +d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ +} rle_z_count = 0; } } + + static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + + static void tdefl_start_dynamic_block(tdefl_compressor *d) + { + int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; + mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) + { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; + } + else if (++rle_repeat_count == 6) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; ) + { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); + } + } + + static void tdefl_start_static_block(tdefl_compressor *d) + { + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) *p++ = 8; + for ( ; i <= 255; ++i) *p++ = 9; + for ( ; i <= 279; ++i) *p++ = 7; + for ( ; i <= 287; ++i) *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); + } + + static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS + static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) + { + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + + if (flags & 1) + { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + // This sequence coaxes MSVC into using cmov's vs. jmp's. + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) + return MZ_FALSE; + + *(mz_uint64*)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) + { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); + } +#else + static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) + { + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + if (flags & 1) + { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + if (match_dist < 512) + { + sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } + else + { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); + } +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS + + static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) + { + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); + } + + static int tdefl_flush_block(tdefl_compressor *d, int flush) + { + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) + { + TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); + + // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. + if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) ) + { + mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) + { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) + { + TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); + } + } + // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. + else if (!comp_block_succeeded) + { + d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) + { + if (flush == TDEFL_FINISH) + { + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } + } + else + { + mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) + { + if (d->m_pPut_buf_func) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } + else if (pOutput_buf_start == d->m_output_buf) + { + int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) + { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } + else + { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; + } + +// Ignore strict-aliasing warning on MinGW gcc +// Added by libmobi author +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p) + static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) + { + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; +#define TDEFL_PROBE \ +next_probe_pos = d->m_next[probe_pos]; \ +if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ +probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ +if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; q = (const mz_uint16*)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; p = s; probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + if (!probe_len) + { + *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break; + } + else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } + } +#else + static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) + { + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; +#define TDEFL_PROBE \ +next_probe_pos = d->m_next[probe_pos]; \ +if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ +probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ +if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; + if (probe_len > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; + c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; + } + } + } +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + static mz_bool tdefl_compress_fast(tdefl_compressor *d) + { + // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. + mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) + { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) + { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; + + while (lookahead_size >= 4) + { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; + mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) + { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U))) + { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + else + { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; + } + } + else + { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) + { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + return MZ_TRUE; + } +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + + static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) + { + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + d->m_huff_count[0][lit]++; + } + + static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) + { + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; + } + + static mz_bool tdefl_compress_normal(tdefl_compressor *d) + { + const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) + { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) + { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) + { + mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; + } + } + else + { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) + { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + break; + + // Simple lazy/greedy parsing state machine. + len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) + { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) + { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; + } + } + else + { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) + { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) + { + if (cur_match_len > d->m_saved_match_len) + { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + } + else + { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; + } + } + else if (!cur_match_dist) + tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + // Move the lookahead forward by len_to_move bytes. + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE); + // Check if it's time to flush the current LZ codes to the internal output buffer. + if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) ) + { + int n; + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + return MZ_TRUE; + } + + static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) + { + if (d->m_pIn_buf_size) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) + { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; + } + + tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) + { + if (!d) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) ) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) + { + if (!tdefl_compress_fast(d)) + return d->m_prev_return_status; + } + else +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + { + if (!tdefl_compress_normal(d)) + return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) + d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) + { + if (tdefl_flush_block(d, flush) < 0) + return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + } + + tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) + { + MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); + } + + tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) + { + d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; + d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; + } + + tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) + { + return d->m_prev_return_status; + } + + mz_uint32 tdefl_get_adler32(tdefl_compressor *d) + { + return d->m_adler32; + } + + mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) + { + tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; + pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); + succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); + MZ_FREE(pComp); return succeeded; + } + + typedef struct + { + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; + } tdefl_output_buffer; + + static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) + { + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) + { + size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE; + do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); + pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; + p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; + } + memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; + return MZ_TRUE; + } + + void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) + { + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; + *pOut_len = out_buf.m_size; return out_buf.m_pBuf; + } + + size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) + { + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) return 0; + out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; + return out_buf.m_size; + } + +#ifndef MINIZ_NO_ZLIB_APIS + static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + + // level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). + mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) + { + mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; + } +#endif //MINIZ_NO_ZLIB_APIS + +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) +#endif + + // Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at + // http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. + // This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. + void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) + { + // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. + static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; + if (!pComp) return NULL; + MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } + // write dummy header + for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); + // compress image data + tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); + for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + // write real header + *pLen_out = out_buf.m_size-41; + { + static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; + mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52, + 0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0, + (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54}; + c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + // write footer (IDAT CRC-32, followed by IEND chunk) + if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24); + // compute final size of file, grab compressed data buffer and return + *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; + } + void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) + { + // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); + } + +#ifdef _MSC_VER +#pragma warning (pop) +#endif + + // ------------------- .ZIP archive reading + +#ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef MINIZ_NO_STDIO +#define MZ_FILE void * +#else +#include +#include + +#if defined(_MSC_VER) || defined(__MINGW64__) + static FILE *mz_fopen(const char *pFilename, const char *pMode) + { + FILE* pFile = NULL; + fopen_s(&pFile, pFilename, pMode); + return pFile; + } + static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) + { + FILE* pFile = NULL; + if (freopen_s(&pFile, pPath, pMode, pStream)) + return NULL; + return pFile; + } +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN mz_fopen +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 _ftelli64 +#define MZ_FSEEK64 _fseeki64 +#define MZ_FILE_STAT_STRUCT _stat +#define MZ_FILE_STAT _stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN mz_freopen +#define MZ_DELETE_FILE remove +#elif defined(__MINGW32__) +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello64 +#define MZ_FSEEK64 fseeko64 +#define MZ_FILE_STAT_STRUCT _stat +#define MZ_FILE_STAT _stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#elif defined(__TINYC__) +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftell +#define MZ_FSEEK64 fseek +#define MZ_FILE_STAT_STRUCT stat +#define MZ_FILE_STAT stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#elif defined(__GNUC__) && _LARGEFILE64_SOURCE +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen64(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello64 +#define MZ_FSEEK64 fseeko64 +#define MZ_FILE_STAT_STRUCT stat64 +#define MZ_FILE_STAT stat64 +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(p, m, s) freopen64(p, m, s) +#define MZ_DELETE_FILE remove +#else +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello +#define MZ_FSEEK64 fseeko +#define MZ_FILE_STAT_STRUCT stat +#define MZ_FILE_STAT stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#endif // #ifdef _MSC_VER +#endif // #ifdef MINIZ_NO_STDIO + +#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) + + // Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff. + enum + { + // ZIP archive identifiers and record sizes + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, + // Central directory header record offsets + MZ_ZIP_CDH_SIG_OFS = 0, MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, MZ_ZIP_CDH_BIT_FLAG_OFS = 8, + MZ_ZIP_CDH_METHOD_OFS = 10, MZ_ZIP_CDH_FILE_TIME_OFS = 12, MZ_ZIP_CDH_FILE_DATE_OFS = 14, MZ_ZIP_CDH_CRC32_OFS = 16, + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, + MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, MZ_ZIP_CDH_DISK_START_OFS = 34, MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, + // Local directory header offsets + MZ_ZIP_LDH_SIG_OFS = 0, MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, MZ_ZIP_LDH_BIT_FLAG_OFS = 6, MZ_ZIP_LDH_METHOD_OFS = 8, MZ_ZIP_LDH_FILE_TIME_OFS = 10, + MZ_ZIP_LDH_FILE_DATE_OFS = 12, MZ_ZIP_LDH_CRC32_OFS = 14, MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, + MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, + // End of central directory offsets + MZ_ZIP_ECDH_SIG_OFS = 0, MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, + }; + + typedef struct + { + void *m_p; + size_t m_size, m_capacity; + mz_uint m_element_size; + } mz_zip_array; + + struct mz_zip_internal_state_tag + { + mz_zip_array m_central_dir; + mz_zip_array m_central_dir_offsets; + mz_zip_array m_sorted_central_dir_offsets; + MZ_FILE *m_pFile; + void *m_pMem; + size_t m_mem_size; + size_t m_mem_capacity; + }; + +#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size +#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index] + + static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); + memset(pArray, 0, sizeof(mz_zip_array)); + } + + static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing) + { + void *pNew_p; size_t new_capacity = min_new_capacity; MZ_ASSERT(pArray->m_element_size); if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; + if (growing) { new_capacity = MZ_MAX(1, pArray->m_capacity); while (new_capacity < min_new_capacity) new_capacity *= 2; } + if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity))) return MZ_FALSE; + pArray->m_p = pNew_p; pArray->m_capacity = new_capacity; + return MZ_TRUE; + } + + static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing) + { + if (new_capacity > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) return MZ_FALSE; } + return MZ_TRUE; + } + + static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing) + { + if (new_size > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) return MZ_FALSE; } + pArray->m_size = new_size; + return MZ_TRUE; + } + + static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n) + { + return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); + } + + static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n) + { + size_t orig_size = pArray->m_size; if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) return MZ_FALSE; + memcpy((mz_uint8*)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size); + return MZ_TRUE; + } + +#ifndef MINIZ_NO_TIME + static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) + { + struct tm tm; + memset(&tm, 0, sizeof(tm)); tm.tm_isdst = -1; + tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; tm.tm_mon = ((dos_date >> 5) & 15) - 1; tm.tm_mday = dos_date & 31; + tm.tm_hour = (dos_time >> 11) & 31; tm.tm_min = (dos_time >> 5) & 63; tm.tm_sec = (dos_time << 1) & 62; + return mktime(&tm); + } + + static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date) + { +#ifdef _MSC_VER + struct tm tm_struct; + struct tm *tm = &tm_struct; + errno_t err = localtime_s(tm, &time); + if (err) + { + *pDOS_date = 0; *pDOS_time = 0; + return; + } +#else + struct tm *tm = localtime(&time); +#endif + *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1)); + *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday); + } +#endif + +#ifndef MINIZ_NO_STDIO + static mz_bool mz_zip_get_file_modified_time(const char *pFilename, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date) + { +#ifdef MINIZ_NO_TIME + (void)pFilename; *pDOS_date = *pDOS_time = 0; +#else + struct MZ_FILE_STAT_STRUCT file_stat; + // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. + if (MZ_FILE_STAT(pFilename, &file_stat) != 0) + return MZ_FALSE; + mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date); +#endif // #ifdef MINIZ_NO_TIME + return MZ_TRUE; + } + +#ifndef MINIZ_NO_TIME + static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, time_t modified_time) + { + struct utimbuf t; t.actime = access_time; t.modtime = modified_time; + return !utime(pFilename, &t); + } +#endif // #ifndef MINIZ_NO_TIME +#endif // #ifndef MINIZ_NO_STDIO + + static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint32 flags) + { + (void)flags; + if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_READING; + pZip->m_archive_size = 0; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); + return MZ_TRUE; + } + + static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index) + { + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; + const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) + { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) + break; + pL++; pR++; + } + return (pL == pE) ? (l_len < r_len) : (l < r); + } + +#define MZ_SWAP_UINT32(a, b) do { mz_uint32 t = a; a = b; b = t; } MZ_MACRO_END + + // Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.) + static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip) + { + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + int start = (size - 2) >> 1, end; + while (start >= 0) + { + int child, root = start; + for ( ; ; ) + { + if ((child = (root << 1) + 1) >= size) + break; + child += (((child + 1) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1]))); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; + } + start--; + } + + end = size - 1; + while (end > 0) + { + int child, root = 0; + MZ_SWAP_UINT32(pIndices[end], pIndices[0]); + for ( ; ; ) + { + if ((child = (root << 1) + 1) >= end) + break; + child += (((child + 1) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1])); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; + } + end--; + } + } + + static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint32 flags) + { + mz_uint cdir_size, num_this_disk, cdir_disk_index; + mz_uint64 cdir_ofs; + mz_int64 cur_file_ofs; + const mz_uint8 *p; + mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8 *pBuf = (mz_uint8 *)buf_u32; + mz_bool sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); + // Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there. + if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + // Find the end of central directory record by scanning the file from the end towards the beginning. + cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); + for ( ; ; ) + { + int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) + return MZ_FALSE; + for (i = n - 4; i >= 0; --i) + if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) + break; + if (i >= 0) + { + cur_file_ofs += i; + break; + } + if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) + return MZ_FALSE; + cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); + } + // Read and verify the end of central directory record. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) || + ((pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS))) + return MZ_FALSE; + + num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); + cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); + if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1))) + return MZ_FALSE; + + if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + + cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); + if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) + return MZ_FALSE; + + pZip->m_central_directory_file_ofs = cdir_ofs; + + if (pZip->m_total_files) + { + mz_uint i, n; + + // Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and another to hold the sorted indices. + if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) || + (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE))) + return MZ_FALSE; + + if (sort_central_dir) + { + if (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE)) + return MZ_FALSE; + } + + if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size) + return MZ_FALSE; + + // Now create an index into the central directory file records, do some basic sanity checking on each record, and check for zip64 entries (which are not yet supported). + p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; + for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) + { + mz_uint total_header_size, comp_size, decomp_size, disk_index; + if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) + return MZ_FALSE; + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); + if (sort_central_dir) + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i; + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || (comp_size == 0xFFFFFFFF)) + return MZ_FALSE; + disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); + if ((disk_index != num_this_disk) && (disk_index != 1)) + return MZ_FALSE; + if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) + return MZ_FALSE; + if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n) + return MZ_FALSE; + n -= total_header_size; p += total_header_size; + } + } + + if (sort_central_dir) + mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); + + return MZ_TRUE; + } + + mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags) + { + if ((!pZip) || (!pZip->m_pRead)) + return MZ_FALSE; + if (!mz_zip_reader_init_internal(pZip, flags)) + return MZ_FALSE; + pZip->m_archive_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; + } + + static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) + { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); + memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); + return s; + } + + mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags) + { + if (!mz_zip_reader_init_internal(pZip, flags)) + return MZ_FALSE; + pZip->m_archive_size = size; + pZip->m_pRead = mz_zip_mem_read_func; + pZip->m_pIO_opaque = pZip; +#ifdef __cplusplus + pZip->m_pState->m_pMem = const_cast(pMem); +#else + pZip->m_pState->m_pMem = (void *)pMem; +#endif + pZip->m_pState->m_mem_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; + } + +#ifndef MINIZ_NO_STDIO + static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) + { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); + } + + mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags) + { + mz_uint64 file_size; + MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb"); + if (!pFile) + return MZ_FALSE; + if (MZ_FSEEK64(pFile, 0, SEEK_END)) + { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + file_size = MZ_FTELL64(pFile); + if (!mz_zip_reader_init_internal(pZip, flags)) + { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + pZip->m_pRead = mz_zip_file_read_func; + pZip->m_pIO_opaque = pZip; + pZip->m_pState->m_pFile = pFile; + pZip->m_archive_size = file_size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; + } +#endif // #ifndef MINIZ_NO_STDIO + + mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) + { + return pZip ? pZip->m_total_files : 0; + } + + static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh(mz_zip_archive *pZip, mz_uint file_index) + { + if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return NULL; + return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); + } + + mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index) + { + mz_uint m_bit_flag; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) + return MZ_FALSE; + m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + return (m_bit_flag & 1); + } + + mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index) + { + mz_uint filename_len, external_attr; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) + return MZ_FALSE; + + // First see if the filename ends with a '/' character. + filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_len) + { + if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') + return MZ_TRUE; + } + + // Bugfix: This code was also checking if the internal attribute was non-zero, which wasn't correct. + // Most/all zip writers (hopefully) set DOS file/directory attributes in the low 16-bits, so check for the DOS directory flag and ignore the source OS ID in the created by field. + // FIXME: Remove this check? Is it necessary - we already check the filename. + external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + if ((external_attr & 0x10) != 0) + return MZ_TRUE; + + return MZ_FALSE; + } + + mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat) + { + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if ((!p) || (!pStat)) + return MZ_FALSE; + + // Unpack the central directory record. + pStat->m_file_index = file_index; + pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); + pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); + pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); + pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); +#ifndef MINIZ_NO_TIME + pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); +#endif + pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); + pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); + pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + + // Copy as much of the filename and comment as possible. + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); + memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pStat->m_filename[n] = '\0'; + + n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); + pStat->m_comment_size = n; + memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n); pStat->m_comment[n] = '\0'; + + return MZ_TRUE; + } + + mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size) + { + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) { if (filename_buf_size) pFilename[0] = '\0'; return 0; } + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_buf_size) + { + n = MZ_MIN(n, filename_buf_size - 1); + memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); + pFilename[n] = '\0'; + } + return n + 1; + } + + static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, const char *pB, mz_uint len, mz_uint flags) + { + mz_uint i; + if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) + return 0 == memcmp(pA, pB, len); + for (i = 0; i < len; ++i) + if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) + return MZ_FALSE; + return MZ_TRUE; + } + + static MZ_FORCEINLINE int mz_zip_reader_filename_compare(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, mz_uint r_len) + { + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) + { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) + break; + pL++; pR++; + } + return (pL == pE) ? (int)(l_len - r_len) : (l - r); + } + + static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, const char *pFilename) + { + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + const mz_uint filename_len = (mz_uint)strlen(pFilename); + int l = 0, h = size - 1; + while (l <= h) + { + int m = (l + h) >> 1, file_index = pIndices[m], comp = mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len); + if (!comp) + return file_index; + else if (comp < 0) + l = m + 1; + else + h = m - 1; + } + return -1; + } + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags) + { + mz_uint file_index; size_t name_len, comment_len; + if ((!pZip) || (!pZip->m_pState) || (!pName) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return -1; + if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) + return mz_zip_reader_locate_file_binary_search(pZip, pName); + name_len = strlen(pName); if (name_len > 0xFFFF) return -1; + comment_len = pComment ? strlen(pComment) : 0; if (comment_len > 0xFFFF) return -1; + for (file_index = 0; file_index < pZip->m_total_files; file_index++) + { + const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); + mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); + const char *pFilename = (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + if (filename_len < name_len) + continue; + if (comment_len) + { + mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); + const char *pFile_comment = pFilename + filename_len + file_extra_len; + if ((file_comment_len != comment_len) || (!mz_zip_reader_string_equal(pComment, pFile_comment, file_comment_len, flags))) + continue; + } + if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) + { + int ofs = filename_len - 1; + do + { + if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':')) + break; + } while (--ofs >= 0); + ofs++; + pFilename += ofs; filename_len -= ofs; + } + if ((filename_len == name_len) && (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags))) + return file_index; + } + return -1; + } + + mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) + { + int status = TINFL_STATUS_DONE; + mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; + mz_zip_archive_file_stat file_stat; + void *pRead_buf; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + tinfl_decompressor inflator; + + if ((buf_size) && (!pBuf)) + return MZ_FALSE; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + // Empty file, or a directory (but not always a directory - I've seen odd zips with directories that have compressed data which inflates to 0 bytes) + if (!file_stat.m_comp_size) + return MZ_TRUE; + + // Entry is a subdirectory (I've seen old zips with dir entries which have compressed deflate data which inflates to 0 bytes, but these entries claim to uncompress to 512 bytes in the headers). + // I'm torn how to handle this case - should it fail instead? + if (mz_zip_reader_is_file_a_directory(pZip, file_index)) + return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) + return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Ensure supplied output buffer is large enough. + needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size; + if (buf_size < needed_size) + return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) + { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size) + return MZ_FALSE; + return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32); + } + + // Decompress the file either directly from memory or from a file input buffer. + tinfl_init(&inflator); + + if (pZip->m_pState->m_pMem) + { + // Read directly from the archive in memory. + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } + else if (pUser_read_buf) + { + // Use a user provided read buffer. + if (!user_read_buf_size) + return MZ_FALSE; + pRead_buf = (mz_uint8 *)pUser_read_buf; + read_buf_size = user_read_buf_size; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + else + { + // Temporarily allocate a read buffer. + read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) +#endif + return MZ_FALSE; + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + do + { + size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress(&inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + out_buf_ofs += out_buf_size; + } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); + + if (status == TINFL_STATUS_DONE) + { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + + return status == TINFL_STATUS_DONE; + } + + mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) + { + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) + return MZ_FALSE; + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size); + } + + mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags) + { + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, NULL, 0); + } + + mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags) + { + return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0); + } + + void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags) + { + mz_uint64 comp_size, uncomp_size, alloc_size; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + void *pBuf; + + if (pSize) + *pSize = 0; + if (!p) + return NULL; + + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + + alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#endif + return NULL; + if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) + return NULL; + + if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, flags)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return NULL; + } + + if (pSize) *pSize = (size_t)alloc_size; + return pBuf; + } + + void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags) + { + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) + { + if (pSize) *pSize = 0; + return MZ_FALSE; + } + return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); + } + + mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) + { + int status = TINFL_STATUS_DONE; mz_uint file_crc32 = MZ_CRC32_INIT; + mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs; + mz_zip_archive_file_stat file_stat; + void *pRead_buf = NULL; void *pWrite_buf = NULL; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + // Empty file, or a directory (but not always a directory - I've seen odd zips with directories that have compressed data which inflates to 0 bytes) + if (!file_stat.m_comp_size) + return MZ_TRUE; + + // Entry is a subdirectory (I've seen old zips with dir entries which have compressed deflate data which inflates to 0 bytes, but these entries claim to uncompress to 512 bytes in the headers). + // I'm torn how to handle this case - should it fail instead? + if (mz_zip_reader_is_file_a_directory(pZip, file_index)) + return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) + return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + // Decompress the file either directly from memory or from a file input buffer. + if (pZip->m_pState->m_pMem) + { + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } + else + { + read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) + { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pState->m_pMem) + { +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF)) +#endif + return MZ_FALSE; + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) + status = TINFL_STATUS_FAILED; + else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)file_stat.m_comp_size); + cur_file_ofs += file_stat.m_comp_size; + out_buf_ofs += file_stat.m_comp_size; + comp_remaining = 0; + } + else + { + while (comp_remaining) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + + if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); + + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + out_buf_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + } + } + } + else + { + tinfl_decompressor inflator; + tinfl_init(&inflator); + + if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) + status = TINFL_STATUS_FAILED; + else + { + do + { + mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress(&inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + + if (out_buf_size) + { + if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size) + { + status = TINFL_STATUS_FAILED; + break; + } + file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); + if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) + { + status = TINFL_STATUS_FAILED; + break; + } + } + } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT)); + } + } + + if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) + { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || (file_crc32 != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if (!pZip->m_pState->m_pMem) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + if (pWrite_buf) + pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); + + return status == TINFL_STATUS_DONE; + } + + mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) + { + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) + return MZ_FALSE; + return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags); + } + +#ifndef MINIZ_NO_STDIO + static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n) + { + (void)ofs; return MZ_FWRITE(pBuf, 1, n, (MZ_FILE*)pOpaque); + } + + mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags) + { + mz_bool status; + mz_zip_archive_file_stat file_stat; + MZ_FILE *pFile; + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + pFile = MZ_FOPEN(pDst_filename, "wb"); + if (!pFile) + return MZ_FALSE; + status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); + if (MZ_FCLOSE(pFile) == EOF) + return MZ_FALSE; +#ifndef MINIZ_NO_TIME + if (status) + mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); +#endif + return status; + } +#endif // #ifndef MINIZ_NO_STDIO + + mz_bool mz_zip_reader_end(mz_zip_archive *pZip) + { + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + + if (pZip->m_pState) + { + mz_zip_internal_state *pState = pZip->m_pState; pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) + { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + } + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + + return MZ_TRUE; + } + +#ifndef MINIZ_NO_STDIO + mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags) + { + int file_index = mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags); + if (file_index < 0) + return MZ_FALSE; + return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); + } +#endif + + // ------------------- .ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + + static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); } + static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); p[2] = (mz_uint8)(v >> 16); p[3] = (mz_uint8)(v >> 24); } +#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) +#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) + + mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) + { + if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (pZip->m_file_offset_alignment) + { + // Ensure user specified file offset alignment is a power of 2. + if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) + return MZ_FALSE; + } + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_archive_size = existing_size; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); + return MZ_TRUE; + } + + static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) + { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); +#ifdef _MSC_VER + if ((!n) || ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#else + if ((!n) || ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#endif + return 0; + if (new_size > pState->m_mem_capacity) + { + void *pNew_block; + size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); while (new_capacity < new_size) new_capacity *= 2; + if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) + return 0; + pState->m_pMem = pNew_block; pState->m_mem_capacity = new_capacity; + } + memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); + pState->m_mem_size = (size_t)new_size; + return n; + } + + mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size) + { + pZip->m_pWrite = mz_zip_heap_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) + return MZ_FALSE; + if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning))) + { + if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size))) + { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_mem_capacity = initial_allocation_size; + } + return MZ_TRUE; + } + +#ifndef MINIZ_NO_STDIO + static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) + { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); + } + + mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning) + { + MZ_FILE *pFile; + pZip->m_pWrite = mz_zip_file_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) + return MZ_FALSE; + if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) + { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_pFile = pFile; + if (size_to_reserve_at_beginning) + { + mz_uint64 cur_ofs = 0; char buf[4096]; MZ_CLEAR_OBJ(buf); + do + { + size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) + { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + cur_ofs += n; size_to_reserve_at_beginning -= n; + } while (size_to_reserve_at_beginning); + } + return MZ_TRUE; + } +#endif // #ifndef MINIZ_NO_STDIO + + mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename) + { + mz_zip_internal_state *pState; + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + // No sense in trying to write to an archive that's already at the support max size + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if (pState->m_pFile) + { +#ifdef MINIZ_NO_STDIO + pFilename; return MZ_FALSE; +#else + // Archive is being read from stdio - try to reopen as writable. + if (pZip->m_pIO_opaque != pZip) + return MZ_FALSE; + if (!pFilename) + return MZ_FALSE; + pZip->m_pWrite = mz_zip_file_write_func; + if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) + { + // The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it. + mz_zip_reader_end(pZip); + return MZ_FALSE; + } +#endif // #ifdef MINIZ_NO_STDIO + } + else if (pState->m_pMem) + { + // Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback. + if (pZip->m_pIO_opaque != pZip) + return MZ_FALSE; + pState->m_mem_capacity = pState->m_mem_size; + pZip->m_pWrite = mz_zip_heap_write_func; + } + // Archive is being read via a user provided read function - make sure the user has specified a write function too. + else if (!pZip->m_pWrite) + return MZ_FALSE; + + // Start writing new files at the archive's current central directory location. + pZip->m_archive_size = pZip->m_central_directory_file_ofs; + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_central_directory_file_ofs = 0; + + return MZ_TRUE; + } + + mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags) + { + return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0); + } + + typedef struct + { + mz_zip_archive *m_pZip; + mz_uint64 m_cur_archive_file_ofs; + mz_uint64 m_comp_size; + } mz_zip_writer_add_state; + + static mz_bool mz_zip_writer_add_put_buf_callback(const void* pBuf, int len, void *pUser) + { + mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; + if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len) + return MZ_FALSE; + pState->m_cur_archive_file_ofs += len; + pState->m_comp_size += len; + return MZ_TRUE; + } + + static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date) + { + (void)pZip; + memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); + return MZ_TRUE; + } + + static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) + { + (void)pZip; + memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs); + return MZ_TRUE; + } + + static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, const void *pExtra, mz_uint16 extra_size, const void *pComment, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) + { + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; + size_t orig_central_dir_size = pState->m_central_dir.m_size; + mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + + // No zip64 support yet + if ((local_header_ofs > 0xFFFFFFFF) || (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + comment_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, extra_size, comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes)) + return MZ_FALSE; + + if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, ¢ral_dir_ofs, 1))) + { + // Try to push the central directory array back into its original state. + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return MZ_FALSE; + } + + return MZ_TRUE; + } + + static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) + { + // Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes. + if (*pArchive_name == '/') + return MZ_FALSE; + while (*pArchive_name) + { + if ((*pArchive_name == '\\') || (*pArchive_name == ':')) + return MZ_FALSE; + pArchive_name++; + } + return MZ_TRUE; + } + + static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive *pZip) + { + mz_uint32 n; + if (!pZip->m_file_offset_alignment) + return 0; + n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); + return (pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1); + } + + static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, mz_uint64 cur_file_ofs, mz_uint32 n) + { + char buf[4096]; + memset(buf, 0, MZ_MIN(sizeof(buf), n)); + while (n) + { + mz_uint32 s = MZ_MIN(sizeof(buf), n); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) + return MZ_FALSE; + cur_file_ofs += s; n -= s; + } + return MZ_TRUE; + } + + mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32) + { + mz_uint16 method = 0, dos_time = 0, dos_date = 0; + mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + tdefl_compressor *pComp = NULL; + mz_bool store_data_uncompressed; + mz_zip_internal_state *pState; + + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) + return MZ_FALSE; + // No zip64 support yet + if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return MZ_FALSE; + +#ifndef MINIZ_NO_TIME + { + time_t cur_time; time(&cur_time); + mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date); + } +#endif // #ifndef MINIZ_NO_TIME + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) + return MZ_FALSE; + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) + { + // Set DOS Subdirectory attribute bit. + ext_attributes |= 0x10; + // Subdirectories cannot contain data. + if ((buf_size) || (uncomp_size)) + return MZ_FALSE; + } + + // Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.) + if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) + return MZ_FALSE; + + if ((!store_data_uncompressed) && (buf_size)) + { + if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) + return MZ_FALSE; + } + + if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header))) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } + cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + { + uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8*)pBuf, buf_size); + uncomp_size = buf_size; + if (uncomp_size <= 3) + { + level = 0; + store_data_uncompressed = MZ_TRUE; + } + } + + if (store_data_uncompressed) + { + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + cur_archive_file_ofs += buf_size; + comp_size = buf_size; + + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) + method = MZ_DEFLATED; + } + else if (buf_size) + { + mz_zip_writer_add_state state; + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) || + (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pComp = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; + } + +#ifndef MINIZ_NO_STDIO + mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) + { + mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; + mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + MZ_FILE *pSrc_file = NULL; + + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return MZ_FALSE; + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) + return MZ_FALSE; + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date)) + return MZ_FALSE; + + pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); + if (!pSrc_file) + return MZ_FALSE; + MZ_FSEEK64(pSrc_file, 0, SEEK_END); + uncomp_size = MZ_FTELL64(pSrc_file); + MZ_FSEEK64(pSrc_file, 0, SEEK_SET); + + if (uncomp_size > 0xFFFFFFFF) + { + // No zip64 support yet + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + if (uncomp_size <= 3) + level = 0; + + if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header))) + { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } + cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (uncomp_size) + { + mz_uint64 uncomp_remaining = uncomp_size; + void *pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); + if (!pRead_buf) + { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + if (!level) + { + while (uncomp_remaining) + { + mz_uint n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); + if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); + uncomp_remaining -= n; + cur_archive_file_ofs += n; + } + comp_size = uncomp_size; + } + else + { + mz_bool result = MZ_FALSE; + mz_zip_writer_add_state state; + tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + for ( ; ; ) + { + size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, MZ_ZIP_MAX_IO_BUF_SIZE); + tdefl_status status; + + if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size) + break; + + uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); + uncomp_remaining -= in_buf_size; + + status = tdefl_compress_buffer(pComp, pRead_buf, in_buf_size, uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH); + if (status == TDEFL_STATUS_DONE) + { + result = MZ_TRUE; + break; + } + else if (status != TDEFL_STATUS_OKAY) + break; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + + if (!result) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + } + + MZ_FCLOSE(pSrc_file); pSrc_file = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; + } +#endif // #ifndef MINIZ_NO_STDIO + + mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index) + { + mz_uint n, bit_flags, num_alignment_padding_bytes; + mz_uint64 comp_bytes_remaining, local_dir_header_ofs; + mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + size_t orig_central_dir_size; + mz_zip_internal_state *pState; + void *pBuf; const mz_uint8 *pSrc_central_header; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + if (NULL == (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index))) + return MZ_FALSE; + pState = pZip->m_pState; + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + cur_src_file_ofs = MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + cur_dst_file_ofs = pZip->m_archive_size; + + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes)) + return MZ_FALSE; + cur_dst_file_ofs += num_alignment_padding_bytes; + local_dir_header_ofs = cur_dst_file_ofs; + if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + comp_bytes_remaining = n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + + if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(sizeof(mz_uint32) * 4, MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining))))) + return MZ_FALSE; + + while (comp_bytes_remaining) + { + n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining); + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_src_file_ofs += n; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_dst_file_ofs += n; + + comp_bytes_remaining -= n; + } + + bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); + if (bit_flags & 8) + { + // Copy data descriptor + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + cur_src_file_ofs += n; + cur_dst_file_ofs += n; + } + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + + // no zip64 support yet + if (cur_dst_file_ofs > 0xFFFFFFFF) + return MZ_FALSE; + + orig_central_dir_size = pState->m_central_dir.m_size; + + memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs); + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) + return MZ_FALSE; + + n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) + { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return MZ_FALSE; + } + + if (pState->m_central_dir.m_size > 0xFFFFFFFF) + return MZ_FALSE; + n = (mz_uint32)orig_central_dir_size; + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) + { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return MZ_FALSE; + } + + pZip->m_total_files++; + pZip->m_archive_size = cur_dst_file_ofs; + + return MZ_TRUE; + } + + mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) + { + mz_zip_internal_state *pState; + mz_uint64 central_dir_ofs, central_dir_size; + mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE]; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + + pState = pZip->m_pState; + + // no zip64 support yet + if ((pZip->m_total_files > 0xFFFF) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + central_dir_ofs = 0; + central_dir_size = 0; + if (pZip->m_total_files) + { + // Write central directory + central_dir_ofs = pZip->m_archive_size; + central_dir_size = pState->m_central_dir.m_size; + pZip->m_central_directory_file_ofs = central_dir_ofs; + if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size) + return MZ_FALSE; + pZip->m_archive_size += central_dir_size; + } + + // Write end of central directory record + MZ_CLEAR_OBJ(hdr); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, sizeof(hdr)) != sizeof(hdr)) + return MZ_FALSE; +#ifndef MINIZ_NO_STDIO + if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) + return MZ_FALSE; +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_archive_size += sizeof(hdr); + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; + return MZ_TRUE; + } + + mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize) + { + if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) + return MZ_FALSE; + if (pZip->m_pWrite != mz_zip_heap_write_func) + return MZ_FALSE; + if (!mz_zip_writer_finalize_archive(pZip)) + return MZ_FALSE; + + *pBuf = pZip->m_pState->m_pMem; + *pSize = pZip->m_pState->m_mem_size; + pZip->m_pState->m_pMem = NULL; + pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; + return MZ_TRUE; + } + + mz_bool mz_zip_writer_end(mz_zip_archive *pZip) + { + mz_zip_internal_state *pState; + mz_bool status = MZ_TRUE; + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) + return MZ_FALSE; + + pState = pZip->m_pState; + pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) + { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); + pState->m_pMem = NULL; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + return status; + } + +#ifndef MINIZ_NO_STDIO + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) + { + mz_bool status, created_new_archive = MZ_FALSE; + mz_zip_archive zip_archive; + struct MZ_FILE_STAT_STRUCT file_stat; + MZ_CLEAR_OBJ(zip_archive); + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return MZ_FALSE; + if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) + { + // Create a new archive. + if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0)) + return MZ_FALSE; + created_new_archive = MZ_TRUE; + } + else + { + // Append to an existing archive. + if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return MZ_FALSE; + if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) + { + mz_zip_reader_end(&zip_archive); + return MZ_FALSE; + } + } + status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0); + // Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.) + if (!mz_zip_writer_finalize_archive(&zip_archive)) + status = MZ_FALSE; + if (!mz_zip_writer_end(&zip_archive)) + status = MZ_FALSE; + if ((!status) && (created_new_archive)) + { + // It's a new archive and something went wrong, so just delete it. + int ignoredStatus = MZ_DELETE_FILE(pZip_filename); + (void)ignoredStatus; + } + return status; + } + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags) + { + int file_index; + mz_zip_archive zip_archive; + void *p = NULL; + + if (pSize) + *pSize = 0; + + if ((!pZip_filename) || (!pArchive_name)) + return NULL; + + MZ_CLEAR_OBJ(zip_archive); + if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return NULL; + + if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL, flags)) >= 0) + p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); + + mz_zip_reader_end(&zip_archive); + return p; + } + +#endif // #ifndef MINIZ_NO_STDIO + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef __cplusplus +} +#endif + +#endif // MINIZ_HEADER_FILE_ONLY + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to + */ diff --git a/src/miniz.h b/src/miniz.h new file mode 100644 index 0000000..e420e58 --- /dev/null +++ b/src/miniz.h @@ -0,0 +1,19 @@ +/** @file miniz.h + * @brief header file for third party miniz.c, zlib replacement + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + +#ifndef libmobi_miniz_h +#define libmobi_miniz_h + +#define MINIZ_HEADER_FILE_ONLY +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +#include "miniz.c" + +#endif diff --git a/src/mobi.h b/src/mobi.h index 3cd7bdc..5eed9ff 100644 --- a/src/mobi.h +++ b/src/mobi.h @@ -1,253 +1,522 @@ -// -// mobi.h -// libmobi -// -// Created by Bartek on 24.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file mobi.h + * @brief Libmobi main header file + * + * This file is installed with the library. + * Include it in your project with "#include ". + * See example of usage in mobitool.c. + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ #ifndef libmobi_mobi_h #define libmobi_mobi_h +#include #include -#include "buffer.h" -#include "compression.h" -#include "debug.h" - -#define MOBI_ERROR -1 -#define MOBI_SUCCESS 0 - -#define MOBI_USE_KF8 1 -#define MOBI_USE_KF7 0 - -#define EPOCH_MAC_DIFF 2082844800 -#define PALMDB_HEADER_LEN 78 -#define PALMDB_NAME_SIZE_MAX 32 -#define PALMDB_ATTRIBUTE_DEFAULT 0 -#define PALMDB_VERSION_DEFAULT 0 -#define PALMDB_MODNUM_DEFAULT 0 -#define PALMDB_APPINFO_DEFAULT 0 -#define PALMDB_SORTINFO_DEFAULT 0 -#define PALMDB_TYPE_DEFAULT "BOOK" -#define PALMDB_CREATOR_DEFAULT "MOBI" -#define PALMDB_NEXTREC_DEFAULT 0 - -#define RECORD0_HEADER_LEN 16 -#define RECORD0_NO_COMPRESSION 1 -#define RECORD0_PALMDOC_COMPRESSION 2 -#define RECORD0_HUFF_COMPRESSION 17480 -#define RECORD0_RECORD_SIZE_MAX 4096 -#define RECORD0_NO_ENCRYPTION 0 -#define RECORD0_OLD_ENCRYPTION 1 -#define RECORD0_MOBI_ENCRYPTION 2 - -#define PDB_RECORD_INFO_SIZE 8 - -#define MOBI_MAGIC "MOBI" -#define EXTH_MAGIC "EXTH" -#define HUFF_MAGIC "HUFF" -#define CDIC_MAGIC "CDIC" - -#define CDIC_HEADER_LEN 16 -#define HUFF_HEADER_LEN 24 -#define HUFF_RECORD_MINSIZE 2584 - -// EXTH -#define DRM_SERVER_ID 1 -#define DRM_COMMERCE_ID 2 -#define DRM_EBOOKBASE_BOOK_ID 3 - -#define MOBI_EXTH_AUTHOR 100 // -#define MOBI_EXTH_PUBLISHER 101 // -#define MOBI_EXTH_IMPRINT 102 // -#define MOBI_EXTH_DESCRIPTION 103 // -#define MOBI_EXTH_ISBN 104 // -#define MOBI_EXTH_SUBJECT 105 // -#define MOBI_EXTH_PUBLISHINGDATE 106 // -#define MOBI_EXTH_REVIEW 107 // -#define MOBI_EXTH_CONTRIBUTOR 108 // -#define MOBI_EXTH_RIGHTS 109 // -#define MOBI_EXTH_SUBJECTCODE 110 // -#define MOBI_EXTH_TYPE 111 // -#define MOBI_EXTH_SOURCE 112 // -#define MOBI_EXTH_ASIN 113 -#define MOBI_EXTH_VERSION 114 -#define MOBI_EXTH_SAMPLE 115 -#define MOBI_EXTH_STARTREADING 116 -#define MOBI_EXTH_ADULT 117 // -#define MOBI_EXTH_PRICE 118 // -#define MOBI_EXTH_PRICECURRENCY 119 // -#define MOBI_EXTH_KF8BOUNDARY 121 -#define MOBI_EXTH_COUNTRESOURCES 125 -#define MOBI_EXTH_KF8OVERURI 129 - -#define MOBI_EXTH_DICTNAME 200 // -#define MOBI_EXTH_COVEROFFSET 201 // -#define MOBI_EXTH_THUMBOFFSET 202 -#define MOBI_EXTH_HASFAKECOVER 203 -#define MOBI_EXTH_CREATORSOFT 204 -#define MOBI_EXTH_CREATORMAJOR 205 -#define MOBI_EXTH_CREATORMINOR 206 -#define MOBI_EXTH_CREATORBUILD 207 -#define MOBI_EXTH_WATERMARK 208 -#define MOBI_EXTH_TAMPERKEYS 209 - -#define MOBI_EXTH_FONTSIGNATURE 300 - -#define MOBI_EXTH_CLIPPINGLIMIT 401 -#define MOBI_EXTH_PUBLISHERLIMIT 402 -#define MOBI_EXTH_TTS 404 -#define MOBI_EXTH_RENAL 405 -#define MOBI_EXTH_RENALEXPIRE 406 - -#define MOBI_EXTH_CDETYPE 501 -#define MOBI_EXTH_LASTUPDATE 502 -#define MOBI_EXTH_UPDATEDTITLE 503 -#define MOBI_EXTH_LANGUAGE 524 // -#define MOBI_EXTH_ALIGNMENT 525 -#define MOBI_EXTH_CREATORBUILD2 535 - +#include +#include +/** @brief Visibility attributes for symbol export */ +#if defined (__CYGWIN__) || defined (__MINGW32__) +#define MOBI_EXPORT __attribute__((visibility("default"))) __declspec(dllexport) extern +#else +#define MOBI_EXPORT __attribute__((__visibility__("default"))) +#endif +#ifdef __cplusplus +extern "C" +{ +#endif + /** + @defgroup mobi_enums Exported enums + @{ + */ + + /** + @brief Error codes returned by functions + */ + typedef enum { + MOBI_SUCCESS = 0, /**< Generic success return value */ + MOBI_ERROR = 1, /**< Generic error return value */ + MOBI_PARAM_ERR = 2, /**< Wrong function parameter */ + MOBI_DATA_CORRUPT = 3, /**< Corrupted data */ + MOBI_FILE_NOT_FOUND = 4, /**< File not found */ + MOBI_FILE_ENCRYPTED = 5, /**< Unsupported encrypted data */ + MOBI_FILE_UNSUPPORTED = 6, /**< Unsupported document type */ + MOBI_MALLOC_FAILED = 7, /**< Memory allocation error */ + MOBI_INIT_FAILED = 8, /**< Initialization error */ + MOBI_BUFFER_END = 9, /**< Out of buffer error */ + MOBI_XML_ERR = 10, /**< LibXML2 error */ + } MOBI_RET; + + /** + @brief EXTH record types + */ + typedef enum { + EXTH_NUMERIC = 0, + EXTH_STRING = 1, + EXTH_BINARY = 2 + } MOBIExthType; + + /** + @brief EXTH record tags + */ + typedef enum { + EXTH_DRMSERVER = 1, + EXTH_DRMCOMMERCE = 2, + EXTH_DRMEBOOKBASE = 3, + + EXTH_TITLE = 99, /**< */ + EXTH_AUTHOR = 100, /**< */ + EXTH_PUBLISHER = 101, /**< */ + EXTH_IMPRINT = 102, /**< */ + EXTH_DESCRIPTION = 103, /**< */ + EXTH_ISBN = 104, /**< */ + EXTH_SUBJECT = 105, /**< */ + EXTH_PUBLISHINGDATE = 106, /**< */ + EXTH_REVIEW = 107, /**< */ + EXTH_CONTRIBUTOR = 108, /**< */ + EXTH_RIGHTS = 109, /**< */ + EXTH_SUBJECTCODE = 110, /**< */ + EXTH_TYPE = 111, /**< */ + EXTH_SOURCE = 112, /**< */ + EXTH_ASIN = 113, + EXTH_VERSION = 114, + EXTH_SAMPLE = 115, + EXTH_STARTREADING = 116, /**< Start reading */ + EXTH_ADULT = 117, /**< */ + EXTH_PRICE = 118, /**< */ + EXTH_CURRENCY = 119, /**< */ + EXTH_KF8BOUNDARY = 121, + EXTH_FIXEDLAYOUT = 122, /**< */ + EXTH_BOOKTYPE = 123, /**< */ + EXTH_ORIENTATIONLOCK = 124, /**< */ + EXTH_COUNTRESOURCES = 125, + EXTH_ORIGRESOLUTION = 126, /**< */ + EXTH_ZEROGUTTER = 127, /**< */ + EXTH_ZEROMARGIN = 128, /**< */ + EXTH_KF8COVERURI = 129, + EXTH_RESCOFFSET = 131, + EXTH_REGIONMAGNI = 132, /**< */ + + EXTH_DICTNAME = 200, /**< */ + EXTH_COVEROFFSET = 201, /**< */ + EXTH_THUMBOFFSET = 202, + EXTH_HASFAKECOVER = 203, + EXTH_CREATORSOFT = 204, + EXTH_CREATORMAJOR = 205, + EXTH_CREATORMINOR = 206, + EXTH_CREATORBUILD = 207, + EXTH_WATERMARK = 208, + EXTH_TAMPERKEYS = 209, + + EXTH_FONTSIGNATURE = 300, + + EXTH_CLIPPINGLIMIT = 401, + EXTH_PUBLISHERLIMIT = 402, + EXTH_UNK403 = 403, + EXTH_TTSDISABLE = 404, + EXTH_UNK405 = 405, + EXTH_RENTAL = 406, + EXTH_UNK407 = 407, + EXTH_UNK450 = 450, + EXTH_UNK451 = 451, + EXTH_UNK452 = 452, + EXTH_UNK453 = 453, + + EXTH_DOCTYPE = 501, /**< PDOC - Personal Doc; EBOK - ebook; EBSP - ebook sample; */ + EXTH_LASTUPDATE = 502, + EXTH_UPDATEDTITLE = 503, + EXTH_ASIN504 = 504, + EXTH_TITLEFILEAS = 508, + EXTH_CREATORFILEAS = 517, + EXTH_PUBLISHERFILEAS = 522, + EXTH_LANGUAGE = 524, /**< */ + EXTH_ALIGNMENT = 525, /**< */ + EXTH_PAGEDIR = 527, + EXTH_OVERRIDEFONTS = 528, /**< */ + EXTH_SORCEDESC = 529, + EXTH_UNK534 = 534, + EXTH_CREATORBUILDREV = 535, + } MOBIExthTag; + + /** + @brief Types of files stored in database records + */ + typedef enum { + T_UNKNOWN, /**< unknown */ + /* markup */ + T_HTML, /**< html */ + T_CSS, /**< css */ + T_SVG, /**< svg */ + T_OPF, /**< opf */ + T_NCX, /**< ncx */ + /* images */ + T_JPG, /**< jpg */ + T_GIF, /**< gif */ + T_PNG, /**< png */ + T_BMP, /**< bmp */ + /* fonts */ + T_OTF, /**< otf */ + T_TTF, /**< ttf */ + /* media */ + T_MP3, /**< mp3 */ + T_MPG, /**< mp3 */ + T_PDF, /**< pdf */ + /* generic types */ + T_FONT, /**< encoded font */ + T_AUDIO, /**< audio resource */ + T_VIDEO, /**< video resource */ + T_BREAK /**< end of file */ + } MOBIFiletype; + + /** + @brief Metadata of file types + */ + typedef struct { + MOBIFiletype type; /**< MOBIFiletype type */ + char extension[5]; /**< file extension */ + char mime_type[30]; /**< mime-type */ + } MOBIFileMeta; + + /** @} */ + + /** + @defgroup raw_structs Exported structures for the raw, unparsed records metadata and data + @{ + */ + + /** + @brief Parsed data from HUFF and CDIC records needed to unpack huffman compressed text + */ + typedef struct { + size_t index_count; /**< Total number of indices in all CDIC records, stored in each CDIC record header */ + size_t index_read; /**< Number of indices parsed, used by parser */ + size_t code_length; /**< Code length value stored in CDIC record header */ + uint32_t table1[256]; /**< Table of big-endian indices from HUFF record data1 */ + uint32_t mincode_table[33]; /**< Table of big-endian mincodes from HUFF record data2 */ + uint32_t maxcode_table[33]; /**< Table of big-endian maxcodes from HUFF record data2 */ + uint16_t *symbol_offsets; /**< Index of symbol offsets parsed from CDIC records (index_count entries) */ + unsigned char **symbols; /**< Array of pointers to start of symbols data in each CDIC record (index = number of CDIC record) */ + } MOBIHuffCdic; + /** + @brief Header of palmdoc database file + */ + typedef struct { + char name[33]; /**< 0: Database name, zero terminated, trimmed title (+author) */ + uint16_t attributes; /**< 32: Attributes bitfield, PALMDB_ATTRIBUTE_DEFAULT */ + uint16_t version; /**< 34: File version, PALMDB_VERSION_DEFAULT */ + uint32_t ctime; /**< 36: Creation time */ + uint32_t mtime; /**< 40: Modification time */ + uint32_t btime; /**< 44: Backup time */ + uint32_t mod_num; /**< 48: Modification number, PALMDB_MODNUM_DEFAULT */ + uint32_t appinfo_offset; /**< 52: Offset to application info (if present) or zero, PALMDB_APPINFO_DEFAULT */ + uint32_t sortinfo_offset; /**< 56: Offset to sort info (if present) or zero, PALMDB_SORTINFO_DEFAULT */ + char type[5]; /**< 60: Database type, zero terminated, PALMDB_TYPE_DEFAULT */ + char creator[5]; /**< 64: Creator type, zero terminated, PALMDB_CREATOR_DEFAULT */ + uint32_t uid; /**< 68: Used internally to identify record */ + uint32_t next_rec; /**< 72: Used only when database is loaded into memory, PALMDB_NEXTREC_DEFAULT */ + uint16_t rec_count; /**< 76: Number of records in the file */ + } MOBIPdbHeader; -typedef struct { - char name[PALMDB_NAME_SIZE_MAX + 1]; // zero terminated, trimmed title+author - uint16_t attributes; // PALMDB_ATTRIBUTE_DEFAULT - uint16_t version; // PALMDB_VERSION_DEFAULT - uint32_t ctime; // creation time - uint32_t mtime; // modification time - uint32_t btime; // backup time - uint32_t mod_num; // PALMDB_MODNUM_DEFAULT - uint32_t appinfo_offset; // PALMDB_APPINFO_DEFAULT - uint32_t sortinfo_offset; // PALMDB_SORTINFO_DEFAULT - char type[5]; // PALMDB_TYPE_DEFAULT - char creator[5]; // PALMDB_CREATOR_DEFAULT - uint32_t uid; // used internally to identify record - uint32_t next_rec; // PALMDB_NEXTREC_DEFAULT - uint16_t rec_count; // number of records in the file -} MOBIPdbHeader; + /** + @brief Metadata and data of a record. All records form a linked list. + */ + typedef struct MOBIPdbRecord { + uint32_t offset; /**< Offset of the record data from the start of the database */ + size_t size; /**< Calculated size of the record data */ + uint8_t attributes; /**< Record attributes */ + uint32_t uid; /**< Record unique id, usually sequential even numbers */ + unsigned char *data; /**< Record data */ + struct MOBIPdbRecord *next; /**< Pointer to the next record or NULL */ + } MOBIPdbRecord; + /** + @brief Metadata and data of a EXTH record. All records form a linked list. + */ + typedef struct MOBIExthHeader { + uint32_t tag; /**< Record tag */ + uint32_t size; /**< Data size */ + void *data; /**< Record data */ + struct MOBIExthHeader *next; /**< Pointer to the next record or NULL */ + } MOBIExthHeader; + + /** + @brief EXTH tag metadata + */ + typedef struct { + MOBIExthTag tag; /**< Record tag id */ + MOBIExthType type; /**< EXTH_NUMERIC, EXTH_STRING or EXTH_BINARY */ + char *name; /**< Tag name */ + } MOBIExthMeta; + + /** + @brief Header of the Record 0 meta-record + */ + typedef struct { + /* PalmDOC header (extended), offset 0, length 16 */ + uint16_t compression_type; /**< 0; 1 == no compression, 2 = PalmDOC compression, 17480 = HUFF/CDIC compression */ + /* uint16_t unused; // 2; 0 */ + uint32_t text_length; /**< 4; uncompressed length of the entire text of the book */ + uint16_t text_record_count; /**< 8; number of PDB records used for the text of the book */ + uint16_t text_record_size; /**< 10; maximum size of each record containing text, always 4096 */ + uint16_t encryption_type; /**< 12; 0 == no encryption, 1 = Old Mobipocket Encryption, 2 = Mobipocket Encryption */ + uint16_t unknown1; /**< 14; usually 0 */ + } MOBIRecord0Header; + /** + @brief MOBI header which follows Record 0 header + + All MOBI header fields are pointers. Some fields are not present in the header, then the pointer is NULL. + */ + typedef struct { + /* MOBI header, offset 16 */ + char mobi_magic[5]; /**< 16: M O B I { 77, 79, 66, 73 }, zero terminated */ + uint32_t *header_length; /**< 20: the length of the MOBI header, including the previous 4 bytes */ + uint32_t *mobi_type; /**< 24: mobipocket file type */ + uint32_t *text_encoding; /**< 28: 1252 = CP1252, 65001 = UTF-8 */ + uint32_t *uid; /**< 32: unique id */ + uint32_t *version; /**< 36: mobipocket format */ + uint32_t *orth_index; /**< 40: section number of orthographic meta index. MOBI_NOTSET if index is not available. */ + uint32_t *infl_index; /**< 44: section number of inflection meta index. MOBI_NOTSET if index is not available. */ + uint32_t *names_index; /**< 48: section number of names meta index. MOBI_NOTSET if index is not available. */ + uint32_t *keys_index; /**< 52: section number of keys meta index. MOBI_NOTSET if index is not available. */ + uint32_t *extra0_index; /**< 56: section number of extra 0 meta index. MOBI_NOTSET if index is not available. */ + uint32_t *extra1_index; /**< 60: section number of extra 1 meta index. MOBI_NOTSET if index is not available. */ + uint32_t *extra2_index; /**< 64: section number of extra 2 meta index. MOBI_NOTSET if index is not available. */ + uint32_t *extra3_index; /**< 68: section number of extra 3 meta index. MOBI_NOTSET if index is not available. */ + uint32_t *extra4_index; /**< 72: section number of extra 4 meta index. MOBI_NOTSET if index is not available. */ + uint32_t *extra5_index; /**< 76: section number of extra 5 meta index. MOBI_NOTSET if index is not available. */ + uint32_t *non_text_index; /**< 80: first record number (starting with 0) that's not the book's text */ + uint32_t *full_name_offset; /**< 84: offset in record 0 (not from start of file) of the full name of the book */ + uint32_t *full_name_length; /**< 88: length of the full name */ + uint32_t *locale; /**< 92: first byte is main language: 09 = English, next byte is dialect, 08 = British, 04 = US */ + uint32_t *dict_input_lang; /**< 96: input language for a dictionary */ + uint32_t *dict_output_lang; /**< 100: output language for a dictionary */ + uint32_t *min_version; /**< 104: minimum mobipocket version support needed to read this file. */ + uint32_t *image_index; /**< 108: first record number (starting with 0) that contains an image (sequential) */ + uint32_t *huff_rec_index; /**< 112: first huffman compression record */ + uint32_t *huff_rec_count; /**< 116: huffman compression records count */ + uint32_t *datp_rec_index; /**< 120: section number of DATP record */ + uint32_t *datp_rec_count; /**< 124: DATP records count */ + uint32_t *exth_flags; /**< 128: bitfield. if bit 6 (0x40) is set, then there's an EXTH record */ + /* 32 unknown bytes 0? */ + /* unknown2 */ + /* unknown3 */ + /* unknown4 */ + /* unknown5 */ + uint32_t *unknown6; /**< 164: use MOBI_NOTSET */ + uint32_t *drm_offset; /**< 168: offset to DRM key info in DRMed files. MOBI_NOTSET if no DRM */ + uint32_t *drm_count; /**< 172: number of entries in DRM info */ + uint32_t *drm_size; /**< 176: number of bytes in DRM info */ + uint32_t *drm_flags; /**< 180: some flags concerning DRM info */ + /* 8 unknown bytes 0? */ + /* unknown7 */ + /* unknown8 */ + uint16_t *first_text_index; /**< 192: section number of first text record */ + uint16_t *last_text_index; /**< 194: */ + uint32_t *fdst_index; /**< 192 (KF8) section number of FDST record */ + //uint32_t *unknown9; /**< 196: */ + uint32_t *fdst_section_count; /**< 196 (KF8) */ + uint32_t *fcis_index; /**< 200: section number of FCIS record */ + uint32_t *fcis_count; /**< 204: FCIS records count */ + uint32_t *flis_index; /**< 208: section number of FLIS record */ + uint32_t *flis_count; /**< 212: FLIS records count */ + uint32_t *unknown10; /**< 216: */ + uint32_t *unknown11; /**< 220: */ + uint32_t *srcs_index; /**< 224: section number of SRCS record */ + uint32_t *srcs_count; /**< 228: SRCS records count */ + uint32_t *unknown12; /**< 232: */ + uint32_t *unknown13; /**< 236: */ + /* uint16_t fill 0 */ + uint16_t *extra_flags; /**< 242: extra flags */ + uint32_t *ncx_index; /**< 244: section number of NCX record */ + uint32_t *unknown14; /**< 248: */ + uint32_t *fragment_index; /**< 248 (KF8) section number of fragments record */ + uint32_t *unknown15; /**< 252: */ + uint32_t *skeleton_index; /**< 252 (KF8) section number of SKEL record */ + uint32_t *datp_index; /**< 256: section number of DATP record */ + uint32_t *unknown16; /**< 260: */ + uint32_t *guide_index; /**< 260 (KF8) section number of guide record */ + uint32_t *unknown17; /**< 264: */ + uint32_t *unknown18; /**< 268: */ + uint32_t *unknown19; /**< 272: */ + uint32_t *unknown20; /**< 276: */ + } MOBIMobiHeader; -typedef struct pdb_record { - size_t offset; - size_t size; - uint8_t attributes; - uint32_t uid; - char *data; - struct pdb_record *next; -} MOBIPdbRecord; + /** + @brief Main structure holding all metadata and unparsed records data + + In case of hybrid KF7/KF8 file there are two Records 0. + In such case MOBIData is a circular linked list of two independent records, one structure per each Record 0 header. + Records data (MOBIPdbRecord structure) is not duplicated in such case - each struct holds same pointers to all records data. + */ + typedef struct MOBIData { + bool use_kf8; /**< Flag: if set to true (default), KF8 part of hybrid file is parsed, if false - KF7 part will be parsed */ + uint32_t kf8_boundary_offset; /**< Set to KF8 boundary rec number if present, otherwise: MOBI_NOTSET */ + MOBIPdbHeader *ph; /**< Palmdoc database header structure or NULL if not loaded */ + MOBIRecord0Header *rh; /**< Record0 header structure or NULL if not loaded */ + MOBIMobiHeader *mh; /**< MOBI header structure or NULL if not loaded */ + MOBIExthHeader *eh; /**< Linked list of EXTH records or NULL if not loaded */ + MOBIPdbRecord *rec; /**< Linked list of palmdoc database records or NULL if not loaded */ + struct MOBIData *next; /**< Pointer to the other part of hybrid file or NULL if not a hybrid file */ + } MOBIData; + + /** @} */ // end of raw_structs group -typedef struct exth { - int uid; - size_t size; - void *data; - struct exth *next; -} MOBIExtHeader; + /** + @defgroup parsed_structs Exported structures for the parsed records metadata and data + @{ + */ + + /** + @brief Parsed FDST record + + FDST record contains offsets of main sections in RAWML - raw text data. + The sections are usually html part, css parts, svg part. + */ + typedef struct { + size_t fdst_section_count; /**< Number of main sections */ + uint32_t *fdst_section_starts; /**< Array of section start offsets */ + uint32_t *fdst_section_ends; /**< Array of section end offsets */ + } MOBIFdst; -typedef struct { - // PalmDOC header (extended), offset 0, length 16 - uint16_t compression_type; // 0; 1 == no compression, 2 = PalmDOC compression, 17480 = HUFF/CDIC compression - //uint16_t unused; // 2; 0 - uint32_t text_length; // 4; uncompressed length of the entire text of the book - uint16_t text_record_count; // 8; number of PDB records used for the text of the book - uint16_t text_record_size; // 10; maximum size of each record containing text, always 4096 - uint16_t encryption_type; // 12; 0 == no encryption, 1 = Old Mobipocket Encryption, 2 = Mobipocket Encryption - uint16_t unknown1; // 14; usually 0 -} MOBIRecord0Header; + /** + @brief Maximum value of tag values in index entry (MOBIIndexTag) + FIXME: is 2 enough? + */ +#define MOBI_INDX_MAXTAGVALUES 2 + + /** + @brief Parsed tag for an index entry + */ + typedef struct { + size_t tagid; /**< Tag id */ + size_t tagvalues_count; /**< Number of tag values */ + uint32_t tagvalues[MOBI_INDX_MAXTAGVALUES]; /**< Array of tag values */ + } MOBIIndexTag; -typedef struct { - // MOBI header, offset 16 - char mobi_magic[5]; // 16: M O B I { 77, 79, 66, 73 } - uint32_t *header_length; // 20: the length of the MOBI header, including the previous 4 bytes - uint32_t *mobi_type; // 24: mobipocket file type - uint32_t *text_encoding; // 28: 1252 = CP1252, 65001 = UTF-8 - uint32_t *uid; // 32: unique id - uint32_t *file_version; // 36: mobipocket format - uint32_t *orth_index; // 40: section number of orthographic meta index. 0xFFFFFFFF if index is not available. - uint32_t *infl_index; // 44: section number of inflection meta index. 0xFFFFFFFF if index is not available. - uint32_t *names_index; // 48: section number of names meta index. 0xFFFFFFFF if index is not available. - uint32_t *keys_index; // 52: section number of keys meta index. 0xFFFFFFFF if index is not available. - uint32_t *extra0_index; // 56: section number of extra 0 meta index. 0xFFFFFFFF if index is not available. - uint32_t *extra1_index; // 60: section number of extra 1 meta index. 0xFFFFFFFF if index is not available. - uint32_t *extra2_index; // 64: section number of extra 2 meta index. 0xFFFFFFFF if index is not available. - uint32_t *extra3_index; // 68: section number of extra 3 meta index. 0xFFFFFFFF if index is not available. - uint32_t *extra4_index; // 72: section number of extra 4 meta index. 0xFFFFFFFF if index is not available. - uint32_t *extra5_index; // 76: section number of extra 5 meta index. 0xFFFFFFFF if index is not available. - uint32_t *non_text_index; // 80: first record number (starting with 0) that's not the book's text - uint32_t *full_name_offset; // 84: offset in record 0 (not from start of file) of the full name of the book - uint32_t *full_name_length; // 88: - uint32_t *locale; // 92: low byte is main language 09= English, next byte is dialect, 08 = British, 04 = US - uint32_t *input_lang; // 96: input language for a dictionary - uint32_t *output_lang; // 100: output language for a dictionary - uint32_t *min_version; // 104: minimum mobipocket version support needed to read this file. - uint32_t *image_index; // 108: first record number (starting with 0) that contains an image (sequential) - uint32_t *huff_rec_index; // 112: first huffman compression record. - uint32_t *huff_rec_count; // 116: - uint32_t *huff_table_offset; // 120: - uint32_t *huff_table_length; // 124: - uint32_t *exth_flags; // 128: bitfield. if bit 6 (0x40) is set, then there's an EXTH record - // 32 unknown bytes 0? - // unknown2 - // unknown3 - // unknown4 - // unknown5 - uint32_t *unknown6; // 164: use 0xFFFFFFFF - uint32_t *drm_offset; // 168: offset to DRM key info in DRMed files. 0xFFFFFFFF if no DRM - uint32_t *drm_count; // 172: number of entries in DRM info - uint32_t *drm_size; // 176: number of bytes in DRM info - uint32_t *drm_flags; // 180: some flags concerning the DRM info - // 8 unknown bytes 0? - // unknown7 - // unknown8 - uint16_t *first_text_index; // 192: - uint16_t *last_text_index; // 194: - uint32_t *unknown9; // 196: - uint32_t *fcis_index; // 200: - uint32_t *fcis_count; // 204: - uint32_t *flis_index; // 208: - uint32_t *flis_count; // 212: - uint32_t *unknown10; // 216: - uint32_t *unknown11; // 220: - uint32_t *srcs_index; // 224: - uint32_t *srcs_count; // 228: - uint32_t *unknown12; // 232: - uint32_t *unknown13; // 236: - // uint16_t fill 0 - uint16_t *extra_flags; // 242: - uint32_t *ncx_index; // 244: - uint32_t *unknown14; // 248: - uint32_t *unknown15; // 252: - uint32_t *datp_index; // 256: - uint32_t *unknown16; // 260: - uint32_t *unknown17; // 264: - uint32_t *unknown18; // 268: - uint32_t *unknown19; // 272: - uint32_t *unknown20; // 276: -} MOBIMobiHeader; + /** + @brief Parsed INDX index entry + */ + typedef struct { + char *label; /**< Entry string, zero terminated */ + size_t tags_count; /**< Number of tags */ + MOBIIndexTag *tags; /**< Array of tags */ + } MOBIIndexEntry; -typedef struct m { - uint8_t use_kf8; - MOBIPdbHeader *ph; - MOBIRecord0Header *rh; - MOBIMobiHeader *mh; - MOBIExtHeader *eh; - MOBIPdbRecord *rec; - struct m *next; -} MOBIData; + /** + @brief Parsed INDX record + */ + typedef struct { + size_t type; /**< Index type: 0 - normal, 2 - inflection */ + size_t entries_count; /**< Index entries count */ + size_t encoding; /**< Index encoding */ + size_t total_entries_count; /**< Total index entries count */ + size_t ordt_offset; /**< ORDT offset */ + size_t ligt_offset; /**< LIGT offset */ + size_t ordt_entries_count; /**< ORDT index entries count */ + size_t cncx_records_count; /**< Number of compiled NCX records */ + MOBIPdbRecord *cncx_record; /**< Link to CNCX record */ + MOBIIndexEntry *entries; /**< Index entries array */ + } MOBIIndx; + + /** + @brief Reconstructed source file. + + All file parts are organized in a linked list. + */ + typedef struct MOBIPart { + size_t uid; /**< Unique id */ + MOBIFiletype type; /**< File type */ + size_t size; /**< File size */ + unsigned char *data; /**< File data */ + struct MOBIPart *next; /**< Pointer to next part or NULL */ + } MOBIPart; + + /** + @brief Main structure containing reconstructed source parts and indices + */ + typedef struct { + size_t version; /**< Version of Mobipocket document */ + MOBIFdst *fdst; /**< Parsed FDST record or NULL if not present */ + MOBIIndx *skel; /**< Parsed skeleton index or NULL if not present */ + MOBIIndx *frag; /**< Parsed fragments index or NULL if not present */ + MOBIIndx *guide; /**< Parsed guide index or NULL if not present */ + MOBIIndx *ncx; /**< Parsed NCX index or NULL if not present */ + MOBIIndx *orth; /**< Parsed orth index or NULL if not present */ + MOBIPart *flow; /**< Linked list of reconstructed main flow parts or NULL if not present */ + MOBIPart *markup; /**< Linked list of reconstructed markup files or NULL if not present */ + MOBIPart *resources; /**< Linked list of reconstructed resources files or NULL if not present */ + } MOBIRawml; -void write_mobi(void); -int mobi_load_file(MOBIData *m, FILE *file); -int mobi_load_filename(MOBIData *m, const char *path); -MOBIData * mobi_init(); -void mobi_free(MOBIData *m); + /** @} */ // end of parsed_structs group + + /** + @defgroup mobi_export Functions exported by the library + @{ + */ + MOBI_EXPORT const char * mobi_version(void); + MOBI_EXPORT MOBI_RET mobi_load_file(MOBIData *m, FILE *file); + MOBI_EXPORT MOBI_RET mobi_load_filename(MOBIData *m, const char *path); + + MOBI_EXPORT MOBIData * mobi_init(); + MOBI_EXPORT void mobi_free(MOBIData *m); + + MOBI_EXPORT MOBI_RET mobi_parse_kf7(MOBIData *m); + MOBI_EXPORT MOBI_RET mobi_parse_kf8(MOBIData *m); + + MOBI_EXPORT MOBI_RET mobi_parse_huffdic(const MOBIData *m, MOBIHuffCdic *cdic); + MOBI_EXPORT MOBI_RET mobi_parse_fdst(const MOBIData *m, MOBIRawml *rawml); + MOBI_EXPORT MOBI_RET mobi_parse_index(const MOBIData *m, MOBIIndx *indx, const size_t indx_record_number); + MOBI_EXPORT MOBI_RET mobi_parse_rawml(MOBIRawml *rawml, const MOBIData *m); + MOBI_EXPORT MOBI_RET mobi_get_rawml(const MOBIData *m, char *text, size_t *len); + MOBI_EXPORT MOBI_RET mobi_dump_rawml(const MOBIData *m, FILE *file); + MOBI_EXPORT MOBI_RET mobi_decode_font_resource(unsigned char **decoded_font, size_t *decoded_size, MOBIPart *part); + MOBI_EXPORT MOBI_RET mobi_decode_audio_resource(unsigned char **decoded_resource, size_t *decoded_size, MOBIPart *part); + MOBI_EXPORT MOBI_RET mobi_decode_video_resource(unsigned char **decoded_resource, size_t *decoded_size, MOBIPart *part); + + MOBI_EXPORT MOBIPdbRecord * mobi_get_record_by_uid(const MOBIData *m, const size_t uid); + MOBI_EXPORT MOBIPdbRecord * mobi_get_record_by_seqnumber(const MOBIData *m, const size_t uid); + MOBI_EXPORT MOBI_RET mobi_get_fullname(const MOBIData *m, char *fullname, const size_t len); + MOBI_EXPORT size_t mobi_get_text_maxsize(const MOBIData *m); + MOBI_EXPORT size_t mobi_get_kf8offset(const MOBIData *m); + MOBI_EXPORT size_t mobi_get_kf8boundary_seqnumber(const MOBIData *m); + MOBI_EXPORT size_t mobi_get_record_extrasize(const MOBIPdbRecord *record, const uint16_t flags); + MOBI_EXPORT size_t mobi_get_fileversion(const MOBIData *m); + MOBI_EXPORT size_t mobi_get_fdst_record_number(const MOBIData *m); + MOBI_EXPORT MOBIExthMeta mobi_get_exthtagmeta_by_tag(const MOBIExthTag tag); + MOBI_EXPORT MOBIFileMeta mobi_get_filemeta_by_type(const MOBIFiletype type); + MOBI_EXPORT uint32_t mobi_decode_exthvalue(const unsigned char *data, const size_t size); + MOBI_EXPORT char * mobi_decode_exthstring(const MOBIData *m, const unsigned char *data, const size_t size); + MOBI_EXPORT struct tm * mobi_pdbtime_to_time(const long pdb_time); + MOBI_EXPORT const char * mobi_get_locale_string(const uint32_t locale); + MOBI_EXPORT size_t mobi_get_locale_number(const char *locale_string); + + MOBI_EXPORT bool mobi_exists_mobiheader(const MOBIData *m); + MOBI_EXPORT bool mobi_exists_fdst(const MOBIData *m); + MOBI_EXPORT bool mobi_exists_skel_indx(const MOBIData *m); + MOBI_EXPORT bool mobi_exists_frag_indx(const MOBIData *m); + MOBI_EXPORT bool mobi_exists_guide_indx(const MOBIData *m); + MOBI_EXPORT bool mobi_exists_ncx(const MOBIData *m); + MOBI_EXPORT bool mobi_exists_orth(const MOBIData *m); + MOBI_EXPORT bool mobi_is_hybrid(const MOBIData *m); + MOBI_EXPORT bool mobi_is_encrypted(const MOBIData *m); + MOBI_EXPORT bool mobi_is_mobipocket(const MOBIData *m); + + MOBI_EXPORT MOBIRawml * mobi_init_rawml(const MOBIData *m); + MOBI_EXPORT void mobi_free_rawml(MOBIRawml *rawml); + + /** @} */ // end of mobi_export group + +#ifdef __cplusplus +} +#endif -int mobi_parse_huffdic(MOBIData *m, MOBIHuffCdic *cdic); -MOBIPdbRecord * mobi_get_record_by_uid(MOBIData *m, size_t uid); -MOBIPdbRecord * mobi_get_record_by_seqnumber(MOBIData *m, size_t uid); -int mobi_get_rawml(MOBIData *m, char *text, size_t len); -int mobi_dump_rawml(MOBIData *m, FILE *file); -void mobi_get_fullname(MOBIData *m, char *fullname, size_t len); -int mobi_get_kf8boundary(MOBIData *m); #endif diff --git a/src/opf.c b/src/opf.c new file mode 100644 index 0000000..58f5e89 --- /dev/null +++ b/src/opf.c @@ -0,0 +1,1868 @@ +/** @file opf.c + * @brief Functions for handling OPF structures + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + +#include +#include +#ifdef __clang__ +#pragma clang diagnostic push +/* suppress clang documentation warning for libxml headers */ +#pragma clang diagnostic ignored "-Wdocumentation" +#endif +#include +#include +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +#include "opf.h" +#include "index.h" +#include "util.h" +#include "parse_rawml.h" +#include "debug.h" + +/** + @brief Array of valid OPF guide types + + http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.6 + */ +const char *mobi_guide_types[] = { + "cover", /**< the book cover(s), jacket information, etc. */ + "title-page", /**< page with possibly title, author, publisher, and other metadata */ + "toc", /**< table of contents */ + "index", /**< back-of-book style index */ + "glossary", /**< glossary */ + "acknowledgements", /**< acknowledgements */ + "bibliography", /**< bibliography */ + "colophon", /**< colophon */ + "copyright-page", /**< copyright page */ + "dedication", /**< dedication */ + "epigraph", /**< epigraph */ + "foreword", /**< foreword */ + "loi", /**< list of illustrations */ + "lot", /**< list of tables */ + "notes", /**< notes */ + "preface", /**< preface */ + "text", /**< First "real" page of content (e.g. "Chapter 1") */ + NULL /**< eof */ +}; + +/** + @brief Check if type is valid OPF guide element + + Compares types with elements of mobi_guide_types[] array + + @param[in] type OPF guide type + @return True if type is valid guide type, false otherwise + */ +bool mobi_is_guide_type(const char *type) { + size_t i = 0; + size_t type_length = strlen(type); + while (mobi_guide_types[i]) { + if (strncmp(mobi_guide_types[i++], type, type_length) == 0) { + return true; + } + } + /* check if "other" type */ + if (strncmp(type, "other.", 6) == 0) { return true; } + return false; +} + +/** + @brief Reconstruct guide part of the OPF file + + @param[in,out] opf Structure OPF->OPFguide will be filled with parsed data + @param[in] rawml Structure MOBIRawml will be parsed + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_build_opf_guide(OPF *opf, const MOBIRawml *rawml) { + /* parse guide data */ + if (rawml == NULL || rawml->guide == NULL) { + debug_print("%s\n", "Initialization failed"); + return MOBI_INIT_FAILED; + } + size_t i = 0; + MOBI_RET ret; + size_t count = rawml->guide->entries_count; + if (count == 0) { + return MOBI_SUCCESS; + } + opf->guide = malloc(sizeof(OPFguide)); + if (opf->guide == NULL) { + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + OPFreference **reference = malloc((count + 1) * sizeof(OPFreference*)); + if (reference == NULL) { + free(opf->guide); + opf->guide = NULL; + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + while (i < count) { + const MOBIIndexEntry *guide_entry = &rawml->guide->entries[i]; + const char *type = guide_entry->label; + uint32_t cncx_offset; + ret = mobi_get_indxentry_tagvalue(&cncx_offset, guide_entry, INDX_TAG_GUIDE_TITLE_CNCX); + if (ret != MOBI_SUCCESS) { + free(reference); + free(opf->guide); + opf->guide = NULL; + return ret; + } + const MOBIPdbRecord *cncx_record = rawml->guide->cncx_record; + char *ref_title = mobi_get_cncx_string(cncx_record, cncx_offset); + uint32_t frag_number = MOBI_NOTSET; + ret = mobi_get_indxentry_tagvalue(&frag_number, guide_entry, INDX_TAG_FRAG_POSITION); + if (ret != MOBI_SUCCESS) { + debug_print("INDX_TAG_FRAG_POSITION not found (%i)\n", ret); + continue; + /* FIXME: I need some examples which use other tags */ + //mobi_get_indxentry_tagvalue(&frag_number, guide_entry, INDX_TAG_FRAG_FILE_NR); + } + const MOBIIndexEntry *frag_entry = &rawml->frag->entries[frag_number]; + uint32_t file_number; + ret = mobi_get_indxentry_tagvalue(&file_number, frag_entry, INDX_TAG_FRAG_FILE_NR); + if (ret != MOBI_SUCCESS) { + free(reference); + free(opf->guide); + opf->guide = NULL; + return ret; + } + /* check if valid guide type */ + char *ref_type; + size_t type_size = strlen(type); + if (!mobi_is_guide_type(type)) { + /* prepend "other." prefix */ + type_size += 6; + ref_type = malloc(type_size + 1); + snprintf(ref_type, type_size + 1, "other.%s", type); + } else { + ref_type = malloc(type_size + 1); + strncpy(ref_type, type, type_size); + ref_type[type_size] = '\0'; + } + debug_print("", ref_type, ref_title, file_number); + char href[FILENAME_MAX + 1]; + snprintf(href, FILENAME_MAX, "part%05u.html", file_number); + char *ref_href = strdup(href); + reference[i] = calloc(1, sizeof(OPFreference)); + *reference[i] = (OPFreference) { ref_type, ref_title, ref_href }; + i++; + } + /* terminate array with NULL */ + reference[i] = NULL; + opf->guide->reference = reference; + return MOBI_SUCCESS; +} + +/** + @brief Write entries for given ncx level + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] ncx Array of NCX structures with ncx content + @param[in] level TOC level + @param[in] from First entry in NCX array to copy from + @param[in] to Last entry in NCX array to copy from + @param[in] seq Sequential number for playOrder attribute + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_write_ncx_level(xmlTextWriterPtr writer, const NCX *ncx, const size_t level, const size_t from, const size_t to, size_t *seq) { + for (size_t i = from; i < to; i++) { + if (level != ncx[i].level) { + continue; + } + /* start */ + char playorder[10 + 1]; + snprintf(playorder, 11, "%u", (uint32_t) (*seq)++); + char id[20 + 5 + 1]; + snprintf(id, 26, "toc-%u-%u", (uint32_t) (level + 1), (uint32_t) (i - from + 1)); + int xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "navPoint"); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST id); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "playOrder", BAD_CAST playorder); + if (xml_ret < 0) { return MOBI_XML_ERR; } + /* write */ + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "navLabel"); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "text"); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterWriteString(writer, BAD_CAST ncx[i].text); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { return MOBI_XML_ERR; } + /* write */ + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "content"); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "src", BAD_CAST ncx[i].target); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { return MOBI_XML_ERR; } + debug_print("%s - %s\n", ncx[i].text, ncx[i].target); + if (ncx[i].first_child != MOBI_NOTSET) { + mobi_write_ncx_level(writer, ncx, level + 1, ncx[i].first_child, ncx[i].last_child, seq); + } + /* end */ + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { return MOBI_XML_ERR; } + } + return MOBI_SUCCESS; +} + +/** + @brief Write element to XML buffer + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] name Attribute name + @param[in] content Attribute content + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_meta(xmlTextWriterPtr writer, const char *name, const char *content) { + int xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "meta"); + if (xml_ret < 0) { + debug_print("XML error: %i (name: %s, content: %s)\n", xml_ret, name, content); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "name", BAD_CAST name); + if (xml_ret < 0) { + debug_print("XML error: %i (name: %s, content: %s)\n", xml_ret, name, content); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "content", BAD_CAST content); + if (xml_ret < 0) { + debug_print("XML error: %i (name: %s, content: %s)\n", xml_ret, name, content); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (name: %s, content: %s)\n", xml_ret, name, content); + return MOBI_XML_ERR; + } + return MOBI_SUCCESS; +} + + +/** + @brief Add reconstruced opf part to rawml + + @param[in] opf_xml OPF xml string + @param[in,out] rawml New data will be added to MOBIRawml rawml->resources structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_opf_add_to_rawml(const char *opf_xml, MOBIRawml *rawml) { + MOBIPart *opf_part; + size_t uid = 0; + if (rawml->resources) { + MOBIPart *part = rawml->resources; + while (part->next) { + part = part->next; + } + uid = part->uid + 1; + part->next = calloc(1, sizeof(MOBIPart)); + opf_part = part->next; + } + else { + rawml->resources = calloc(1, sizeof(MOBIPart)); + opf_part = rawml->resources; + } + opf_part->uid = uid; + opf_part->next = NULL; + opf_part->data = (unsigned char *) strdup(opf_xml); + opf_part->size = strlen(opf_xml); + opf_part->type = T_OPF; + return MOBI_SUCCESS; +} + +/** + @brief Add reconstruced ncx part to rawml + + @param[in] ncx_xml OPF xml string + @param[in,out] rawml New data will be added to MOBIRawml rawml->resources structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_ncx_add_to_rawml(const char *ncx_xml, MOBIRawml *rawml) { + MOBIPart *ncx_part; + size_t uid = 0; + if (rawml->resources) { + MOBIPart *part = rawml->resources; + while (part->next) { + part = part->next; + } + uid = part->uid + 1; + part->next = calloc(1, sizeof(MOBIPart)); + ncx_part = part->next; + } + else { + rawml->resources = calloc(1, sizeof(MOBIPart)); + ncx_part = rawml->resources; + } + ncx_part->uid = uid; + ncx_part->next = NULL; + ncx_part->data = (unsigned char *) strdup(ncx_xml); + ncx_part->size = strlen(ncx_xml); + ncx_part->type = T_NCX; + return MOBI_SUCCESS; +} + +/** + @brief Write ncx header + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] opf OPF structure to fetch some data + @param[in] maxlevel Value of dtb:depth attribute + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_write_ncx_header(xmlTextWriterPtr writer, const OPF *opf, uint32_t maxlevel) { + /* write header */ + char depth[10 + 1]; + snprintf(depth, 11, "%d", maxlevel); + + /* */ + int xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "head"); + if (xml_ret < 0) { return MOBI_XML_ERR; } + /* meta uid */ + MOBI_RET ret = mobi_xml_write_meta(writer, "dtb:uid", opf->metadata->dc_meta->identifier[0]->value); + if (ret != MOBI_SUCCESS) { return ret; } + /* meta depth */ + ret = mobi_xml_write_meta(writer, "dtb:depth", depth); + if (ret != MOBI_SUCCESS) { return ret; } + /* meta pagecount */ + ret = mobi_xml_write_meta(writer, "dtb:totalPageCount", "0"); + if (ret != MOBI_SUCCESS) { return ret; } + /* meta pagenumber */ + ret = mobi_xml_write_meta(writer, "dtb:maxPageNumber", "0"); + if (ret != MOBI_SUCCESS) { return ret; } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { return MOBI_XML_ERR; } + // + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "docTitle"); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "text"); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterWriteString(writer, BAD_CAST opf->metadata->dc_meta->title[0]); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { return MOBI_XML_ERR; } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { return MOBI_XML_ERR; } + return MOBI_SUCCESS; +} + +/** + @brief Build ncx document using libxml2 and append it to rawml + + @param[in,out] rawml MOBIRawml structure + @param[in] ncx Array of NCX structures with ncx content + @param[in] opf OPF structure to fetch some data + @param[in] maxlevel Value of dtb:depth attribute + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_write_ncx(MOBIRawml *rawml, const NCX *ncx, const OPF *opf, uint32_t maxlevel) { + const xmlChar * NCXNamespace = BAD_CAST "http://www.daisy.org/z3986/2005/ncx/"; + xmlBufferPtr buf = xmlBufferCreate(); + if (buf == NULL) { + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + xmlTextWriterPtr writer = xmlNewTextWriterMemory(buf, 0); + if (writer == NULL) { + xmlBufferFree(buf); + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + xmlTextWriterSetIndent(writer, 1); + int xml_ret = xmlTextWriterStartDocument(writer, NULL, NULL, NULL); + if (xml_ret < 0) { goto cleanup; } + xml_ret = xmlTextWriterStartElementNS(writer, NULL, BAD_CAST "ncx", NCXNamespace); + if (xml_ret < 0) { goto cleanup; } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "version", BAD_CAST "2005-1"); + if (xml_ret < 0) { goto cleanup; } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "xml:lang", BAD_CAST opf->metadata->dc_meta->language[0]); + if (xml_ret < 0) { goto cleanup; } + + MOBI_RET ret = mobi_write_ncx_header(writer, opf, maxlevel); + if (ret != MOBI_SUCCESS) { goto cleanup; } + + /* start */ + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "navMap"); + if (xml_ret < 0) { goto cleanup; } + if (rawml->ncx) { + const size_t count = rawml->ncx->entries_count; + size_t seq = 1; + ret = mobi_write_ncx_level(writer, ncx, 0, 0, count, &seq); + if (ret != MOBI_SUCCESS) { goto cleanup; } + } + + /* end */ + xml_ret = xmlTextWriterEndDocument(writer); + if (xml_ret < 0) { goto cleanup; } + xmlFreeTextWriter(writer); + const char *ncx_xml = (const char *) buf->content; + mobi_ncx_add_to_rawml(ncx_xml, rawml); + xmlBufferFree(buf); + return MOBI_SUCCESS; + +cleanup: + xmlFreeTextWriter(writer); + xmlBufferFree(buf); + debug_print("%s\n", "XML writing failed"); + return MOBI_XML_ERR; +} + + +/** + @brief Free array of ncx entries + + @param[in] ncx Array of NCX structures with ncx content + @param[in] count Size of the array + */ +void mobi_free_ncx(NCX *ncx, size_t count) { + if (ncx) { + while (count--) { + free(ncx[count].target); + free(ncx[count].text); + } + free(ncx); + } +} + +/** + @brief Parse ncx index, recreate ncx document and append it to rawml + + @param[in,out] rawml MOBIRawml structure + @param[in] opf OPF structure to fetch some data + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_build_ncx(MOBIRawml *rawml, const OPF *opf) { + /* parse ncx data */ + if (rawml == NULL) { + debug_print("%s\n", "Initialization failed"); + return MOBI_INIT_FAILED; + } + if (rawml->ncx) { + size_t i = 0; + uint32_t maxlevel = 0; + MOBI_RET ret; + const size_t count = rawml->ncx->entries_count; + if (count == 0) { + return MOBI_SUCCESS; + } + NCX *ncx = malloc(count * sizeof(NCX)); + while (i < count) { + const MOBIIndexEntry *ncx_entry = &rawml->ncx->entries[i]; + const char *label = ncx_entry->label; + const size_t id = strtoul(label, NULL, 16); + uint32_t cncx_offset; + ret = mobi_get_indxentry_tagvalue(&cncx_offset, ncx_entry, INDX_TAG_NCX_TEXT_CNCX); + if (ret != MOBI_SUCCESS) { + mobi_free_ncx(ncx, i); + return ret; + } + const MOBIPdbRecord *cncx_record = rawml->ncx->cncx_record; + char *text = mobi_get_cncx_string(cncx_record, cncx_offset); + if (text == NULL) { + mobi_free_ncx(ncx, i); + return MOBI_MALLOC_FAILED; + } + char *target = malloc(MOBI_ATTRNAME_MAXSIZE + 1); + if (target == NULL) { + free(text); + mobi_free_ncx(ncx, i); + return MOBI_MALLOC_FAILED; + } + if (rawml->version >= 8) { + uint32_t posfid; + ret = mobi_get_indxentry_tagvalue(&posfid, ncx_entry, INDX_TAG_NCX_POSFID); + if (ret != MOBI_SUCCESS) { + free(text); + free(target); + mobi_free_ncx(ncx, i); + return ret; + } + uint32_t posoff; + ret = mobi_get_indxentry_tagvalue(&posoff, ncx_entry, INDX_TAG_NCX_POSOFF); + if (ret != MOBI_SUCCESS) { + free(text); + free(target); + mobi_free_ncx(ncx, i); + return ret; + } + uint32_t filenumber; + char targetid[MOBI_ATTRNAME_MAXSIZE + 1]; + ret = mobi_get_id_by_posoff(&filenumber, targetid, rawml, posfid, posoff); + if (ret != MOBI_SUCCESS) { + free(text); + free(target); + mobi_free_ncx(ncx, i); + return ret; + } + /* FIXME: posoff == 0 means top of file? */ + if (posoff) { + snprintf(target, MOBI_ATTRNAME_MAXSIZE + 1, "part%05u.html#%s", filenumber, targetid); + } else { + snprintf(target, MOBI_ATTRNAME_MAXSIZE + 1, "part%05u.html", filenumber); + } + + } else { + uint32_t filepos; + ret = mobi_get_indxentry_tagvalue(&filepos, ncx_entry, INDX_TAG_NCX_FILEPOS); + if (ret != MOBI_SUCCESS) { + free(text); + free(target); + mobi_free_ncx(ncx, i); + return ret; + } + snprintf(target, MOBI_ATTRNAME_MAXSIZE + 1, "part00000.html#%010u", filepos); + } + uint32_t level; + ret = mobi_get_indxentry_tagvalue(&level, ncx_entry, INDX_TAG_NCX_LEVEL); + if (ret != MOBI_SUCCESS) { + free(text); + free(target); + mobi_free_ncx(ncx, i); + return ret; + } + if (level > maxlevel) { + maxlevel = level; + } + uint32_t parent = MOBI_NOTSET; + ret = mobi_get_indxentry_tagvalue(&parent, ncx_entry, INDX_TAG_NCX_PARENT); + if (ret == MOBI_INIT_FAILED) { + free(text); + free(target); + mobi_free_ncx(ncx, i); + return ret; + } + uint32_t first_child = MOBI_NOTSET; + ret = mobi_get_indxentry_tagvalue(&first_child, ncx_entry, INDX_TAG_NCX_CHILD_START); + if (ret == MOBI_INIT_FAILED) { + free(text); + free(target); + mobi_free_ncx(ncx, i); + return ret; + } + uint32_t last_child = MOBI_NOTSET; + ret = mobi_get_indxentry_tagvalue(&last_child, ncx_entry, INDX_TAG_NCX_CHILD_END); + if (ret == MOBI_INIT_FAILED) { + free(text); + free(target); + mobi_free_ncx(ncx, i); + return ret; + } + debug_print("seq=%zu, id=%zu, text='%s', target='%s', level=%u, parent=%u, fchild=%u, lchild=%u\n", i, id, text, target, level, parent, first_child, last_child); + ncx[i++] = (NCX) {id, text, target, level, parent, first_child,last_child}; + } + mobi_write_ncx(rawml, ncx, opf, maxlevel); + mobi_free_ncx(ncx, count); + } else { + mobi_write_ncx(rawml, NULL, opf, 1); + } + return MOBI_SUCCESS; +} + +/** + @brief Copy text data from EXTH record to array of strings + + It will allocate memory for the array if not already allocated. + It will find first array index that is not already used + + @param[in] m MOBIData structure + @param[in] exth MOBIExthHeader record + @param[in,out] array Array into which text string will be inserted + */ +static void mobi_opf_fill_tag(const MOBIData *m, const MOBIExthHeader *exth, char ***array) { + if (*array == NULL) { + *array = calloc(OPF_META_MAX_TAGS, sizeof(**array)); + if (*array == NULL) { + return; + } + } + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + /* find first free slot */ + if((*array)[i] != NULL) { i++; continue; } + MOBIExthMeta exth_tag = mobi_get_exthtagmeta_by_tag(exth->tag); + char *value = NULL; + if (exth_tag.type == EXTH_NUMERIC) { + value = malloc(10 + 1); + if (value) { + const uint32_t val32 = mobi_decode_exthvalue(exth->data, exth->size); + snprintf(value, 10, "%d", val32); + } + } else if (exth_tag.type == EXTH_STRING) { + value = mobi_decode_exthstring(m, exth->data, exth->size); + } + if (value) { + (*array)[i] = value; + } + return; + } + /* not enough tags */ + debug_print("OPF_META_MAX_TAGS = %i reached\n", OPF_META_MAX_TAGS); +} + +/** + @brief Set values for attributes of OPF tag + + It will allocate memory for the OPFmeta members: name and content. + It will find first array index that is not already used + + @param[in,out] meta Array of OPFmeta structures to be filled with data + @param[in] name Value of the name attribute + @param[in] content Value of the content attribute + */ +static void mobi_opf_set_meta(OPFmeta **meta, const char *name, const char *content) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + /* find first free slot */ + if(meta[i] != NULL) { i++; continue; } + meta[i] = malloc(sizeof(OPFmeta)); + if (meta[i] == NULL) { + return; + } + meta[i]->name = strdup(name); + meta[i]->content = strdup(content); + if (meta[i]->name == NULL || meta[i]->content == NULL) { + free(meta[i]); + meta[i] = NULL; + } + return; + } + /* not enough tags */ + debug_print("OPF_META_MAX_TAGS = %i reached\n", OPF_META_MAX_TAGS); +} + +/** + @brief Set values for attributes of OPF tag + + It will allocate memory for the OPFmeta members: name and content. + Content attribute will be copied from EXTH record. + It will find first array index that is not already used + + @param[in] m MOBIData structure + @param[in] exth MOBIExthHeader structure containing EXTH records + @param[in,out] meta Array of OPFmeta structures to be filled with data + @param[in] name Value of the name attribute + */ +static void mobi_opf_copy_meta(const MOBIData *m, const MOBIExthHeader *exth, OPFmeta **meta, const char *name) { + MOBIExthMeta exth_tag = mobi_get_exthtagmeta_by_tag(exth->tag); + char *content = NULL; + if (exth_tag.tag == EXTH_COVEROFFSET) { + content = malloc(13 + 1); + if (content) { + const uint32_t val32 = mobi_decode_exthvalue(exth->data, exth->size); + snprintf(content, 14, "resource%05d", val32); + } + } else if (exth_tag.type == EXTH_NUMERIC) { + content = malloc(10 + 1); + if (content) { + const uint32_t val32 = mobi_decode_exthvalue(exth->data, exth->size); + snprintf(content, 11, "%d", val32); + } + } else if (exth_tag.type == EXTH_STRING) { + char *string = mobi_decode_exthstring(m, exth->data, exth->size); + content = string; + } + if (content) { + mobi_opf_set_meta(meta, name, content); + free(content); + } +} + +/** + @brief Set values for attributes of OPF manifest tag + + It will allocate memory for the OPFitem members: id, href and media-type. + It will find first array index that is not already used + + @param[in,out] meta Array of OPFmeta structures to be filled with data + @param[in] name Value of the name attribute + @param[in] content Value of the content attribute + */ +void mobi_opf_set_item(OPFmeta **meta, const char *name, const char *content) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + /* find first free slot */ + if(meta[i] != NULL) { i++; continue; } + meta[i] = malloc(sizeof(OPFmeta)); + if (meta[i] == NULL) { + return; + } + meta[i]->name = strdup(name); + meta[i]->content = strdup(content); + if (meta[i]->name == NULL || meta[i]->content == NULL) { + free(meta[i]); + meta[i] = NULL; + } + return; + } + /* not enough tags */ + debug_print("OPF_META_MAX_TAGS = %i reached\n", OPF_META_MAX_TAGS); +} + +/** + @brief Copy text data from EXTH record to "member_name" member of a structure with given type + + Data will copied from curr->data. + It will allocate memory for the array of structures if not already allocated. + It will find first array index that is not already used + + @param[in] mobidata Mobidata structure + @param[in] struct_type Structure type defined with typedef + @param[in] struct_element Member member_name of this structure will be set to EXTH data + @param[in] member_name Structure member name that will be modified + */ +#define mobi_opf_copy_tagtype(mobidata, struct_type, struct_element, member_name) { \ + if (struct_element == NULL) { \ + struct_element = calloc(OPF_META_MAX_TAGS, sizeof(*struct_element)); \ + if(struct_element == NULL) { return MOBI_MALLOC_FAILED; } \ + } \ + struct_type **element = struct_element; \ + size_t i = 0; \ + while (i < OPF_META_MAX_TAGS) { \ + /* find first free slot */ \ + if(element[i] != NULL) { \ + if(element[i]->member_name != NULL) { i++; continue; } \ + } else { \ + element[i] = calloc(1, sizeof(*element[i])); \ + if(element[i] == NULL) { return MOBI_MALLOC_FAILED; } \ + } \ + MOBIExthMeta exth_tag = mobi_get_exthtagmeta_by_tag(curr->tag); \ + char *value = NULL; \ + if (exth_tag.type == EXTH_NUMERIC) { \ + value = malloc(10 + 1); \ + if (value) { \ + const uint32_t val32 = mobi_decode_exthvalue(curr->data, curr->size); \ + snprintf(value, 10, "%d", val32); \ + } \ + } else if (exth_tag.type == EXTH_STRING) { \ + value = mobi_decode_exthstring(mobidata, curr->data, curr->size); \ + } \ + if(value == NULL) { \ + free(element[i]); \ + element[i] = NULL; \ + return MOBI_MALLOC_FAILED; \ + } \ + element[i]->member_name = value; \ + break; \ + } \ + if (i == OPF_META_MAX_TAGS) { \ + /* not enough tags */ \ + debug_print("OPF_META_MAX_TAGS = %i reached\n", OPF_META_MAX_TAGS); \ + } \ +} + +/** + @brief Set "member_name" member of a structure with given type to string value + + It will allocate memory for the array of structures if not already allocated. + It will find first array index that is not already used + + @param[in] struct_type Structure type defined with typedef + @param[in] struct_element Member member_name of this structure will be set to EXTH data + @param[in] member_name Structure member name that will be modified + @param[in] string String value that will be assigned to the structure memeber + */ +#define mobi_opf_set_tagtype(struct_type, struct_element, member_name, string) { \ + if (struct_element == NULL) { \ + struct_element = calloc(OPF_META_MAX_TAGS, sizeof(*struct_element)); \ + if(struct_element == NULL) { return MOBI_MALLOC_FAILED; } \ + } \ + struct_type **element = struct_element; \ + size_t i = 0; \ + while (i < OPF_META_MAX_TAGS) { \ + /* find first free slot */ \ + if(element[i] != NULL) { \ + if(element[i]->member_name != NULL) { i++; continue; } \ + } else { \ + element[i] = calloc(1, sizeof(*element[i])); \ + if(element[i] == NULL) { return MOBI_MALLOC_FAILED; } \ + } \ + element[i]->member_name = strdup(string); \ + if(element[i]->member_name == NULL) { \ + free(element[i]); \ + element[i] = NULL; \ + return MOBI_MALLOC_FAILED; \ + } \ + break; \ + } \ + if (i == OPF_META_MAX_TAGS) { \ + /* not enough tags */ \ + debug_print("OPF_META_MAX_TAGS = %i reached\n", OPF_META_MAX_TAGS); \ + } \ +} + +/** + @brief Copy text data from EXTH record to OPFmetadata tags structure + + @param[in,out] metadata Structure OPFmetadata will be filled with parsed data + @param[in] m MOBIData structure with loaded data + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_opf_from_exth(OPFmetadata *metadata, const MOBIData *m) { + if (m == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; + } + if (m->eh == NULL) { + return MOBI_INIT_FAILED; + } + MOBIExthHeader *curr = m->eh; + /* iterate through EXTH records */ + while (curr != NULL) { + switch (curr->tag) { + /* */ + case EXTH_DESCRIPTION: + mobi_opf_fill_tag(m, curr, &metadata->dc_meta->description); + break; + case EXTH_LANGUAGE: + mobi_opf_fill_tag(m, curr, &metadata->dc_meta->language); + break; + case EXTH_PUBLISHER: + mobi_opf_fill_tag(m, curr, &metadata->dc_meta->publisher); + break; + case EXTH_RIGHTS: + mobi_opf_fill_tag(m, curr, &metadata->dc_meta->rights); + break; + case EXTH_SOURCE: + mobi_opf_fill_tag(m, curr, &metadata->dc_meta->source); + break; + case EXTH_TITLE: + case EXTH_UPDATEDTITLE: + mobi_opf_fill_tag(m, curr, &metadata->dc_meta->title); + break; + case EXTH_TYPE: + mobi_opf_fill_tag(m, curr, &metadata->dc_meta->type); + break; + case EXTH_AUTHOR: + mobi_opf_copy_tagtype(m, OPFcreator, metadata->dc_meta->creator, value); + break; + case EXTH_CONTRIBUTOR: + mobi_opf_copy_tagtype(m, OPFcreator, metadata->dc_meta->contributor, value); + break; + case EXTH_SUBJECT: + mobi_opf_copy_tagtype(m, OPFsubject, metadata->dc_meta->subject, value); + break; + case EXTH_SUBJECTCODE: + mobi_opf_copy_tagtype(m, OPFsubject, metadata->dc_meta->subject, basic_code); + break; + case EXTH_ISBN: + mobi_opf_copy_tagtype(m, OPFidentifier, metadata->dc_meta->identifier, value); + mobi_opf_set_tagtype(OPFidentifier, metadata->dc_meta->identifier, scheme, "ISBN"); + break; + case EXTH_PUBLISHINGDATE: + mobi_opf_copy_tagtype(m, OPFdate, metadata->dc_meta->date, value); + mobi_opf_set_tagtype(OPFdate, metadata->dc_meta->date, event, "publication"); + break; + /* */ + case EXTH_ADULT: + mobi_opf_fill_tag(m, curr, &metadata->x_meta->adult); + break; + case EXTH_DICTNAME: + mobi_opf_fill_tag(m, curr, &metadata->x_meta->dict_short_name); + break; + case EXTH_IMPRINT: + mobi_opf_fill_tag(m, curr, &metadata->x_meta->imprint); + break; + case EXTH_REVIEW: + mobi_opf_fill_tag(m, curr, &metadata->x_meta->review); + break; + case EXTH_PRICE: + mobi_opf_copy_tagtype(m, OPFsrp, metadata->x_meta->srp, value); + break; + case EXTH_CURRENCY: + mobi_opf_copy_tagtype(m, OPFsrp, metadata->x_meta->srp, currency); + break; + /* */ + case EXTH_FIXEDLAYOUT: + mobi_opf_copy_meta(m, curr, metadata->meta, "fixed-layout"); + break; + case EXTH_BOOKTYPE: + mobi_opf_copy_meta(m, curr, metadata->meta, "book-type"); + break; + case EXTH_ORIENTATIONLOCK: + mobi_opf_copy_meta(m, curr, metadata->meta, "orientation-lock"); + break; + case EXTH_ORIGRESOLUTION: + mobi_opf_copy_meta(m, curr, metadata->meta, "original-resolution"); + break; + case EXTH_ZEROGUTTER: + mobi_opf_copy_meta(m, curr, metadata->meta, "zero-gutter"); + break; + case EXTH_ZEROMARGIN: + mobi_opf_copy_meta(m, curr, metadata->meta, "zero-margin"); + break; + case EXTH_REGIONMAGNI: + mobi_opf_copy_meta(m, curr, metadata->meta, "region-mag"); + break; + case EXTH_ALIGNMENT: + mobi_opf_copy_meta(m, curr, metadata->meta, "primary-writing-mode"); + break; + case EXTH_OVERRIDEFONTS: + mobi_opf_copy_meta(m, curr, metadata->meta, "override-kindle-fonts"); + break; + case EXTH_COVEROFFSET: + mobi_opf_copy_meta(m, curr, metadata->meta, "cover"); + break; + default: + break; + } + curr = curr->next; + } + return MOBI_SUCCESS; +} + +/** + @brief Recreate OPF structure + + @param[in,out] opf Structure OPF->OPFmetadata will be filled with parsed data + @param[in] m MOBIData structure containing document metadata + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_build_opf_metadata(OPF *opf, const MOBIData *m) { + if (m == NULL) { + debug_print("%s\n", "Initialization failed"); + return MOBI_INIT_FAILED; + } + opf->metadata = calloc(1, sizeof(OPFmetadata)); + if (opf->metadata == NULL) { + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + /* initialize metadata sub-elements */ + opf->metadata->meta = calloc(OPF_META_MAX_TAGS, sizeof(OPFmeta*)); + if (opf->metadata->meta == NULL) { + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + opf->metadata->dc_meta = calloc(1, sizeof(OPFdcmeta)); + if (opf->metadata->dc_meta == NULL) { + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + opf->metadata->x_meta = calloc(1, sizeof(OPFxmeta)); + if (opf->metadata->x_meta == NULL) { + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + if (m->eh) { + MOBI_RET ret = mobi_get_opf_from_exth(opf->metadata, m); + if (ret != MOBI_SUCCESS) { + return ret; + } + } + /* check for required elements */ + if (opf->metadata->dc_meta->identifier == NULL) { + /* default id will be "0" */ + char uid_string[11] = "0"; + if (m->mh && m->mh->uid) { + snprintf(uid_string, 11, "%u", *m->mh->uid); + } + mobi_opf_set_tagtype(OPFidentifier, opf->metadata->dc_meta->identifier, value, uid_string); + mobi_opf_set_tagtype(OPFidentifier, opf->metadata->dc_meta->identifier, id, "uid"); + } else { + opf->metadata->dc_meta->identifier[0]->id = strdup("uid"); + } + if (opf->metadata->dc_meta->title == NULL) { + opf->metadata->dc_meta->title = calloc(OPF_META_MAX_TAGS, sizeof(char*)); + if (opf->metadata->dc_meta->title == NULL) { + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + if (m->mh && m->mh->full_name_offset && m->mh->full_name_length) { + size_t len = *m->mh->full_name_length; + char full_name[len + 1]; + mobi_get_fullname(m, full_name, len); + opf->metadata->dc_meta->title[0] = strdup(full_name); + } else if (m->ph && strlen(m->ph->name) > 0) { + opf->metadata->dc_meta->title[0] = strdup(m->ph->name); + } else { + opf->metadata->dc_meta->title[0] = strdup("Unknown"); + } + } + if (opf->metadata->dc_meta->language == NULL) { + opf->metadata->dc_meta->language = calloc(OPF_META_MAX_TAGS, sizeof(char*)); + if (opf->metadata->dc_meta->language == NULL) { + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + if (m->mh && m->mh->locale) { + uint32_t lang_code = *m->mh->locale; + opf->metadata->dc_meta->language[0] = strdup(mobi_get_locale_string(lang_code)); + } else { + opf->metadata->dc_meta->language[0] = strdup("en"); + } + } + return MOBI_SUCCESS; +} + +/** + @brief Write array of xml elements of given name to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElementNS() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] name XML element name + @param[in] content Array of XML element contents + @param[in] ns XML namespace string or NULL if empty + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_element_ns(xmlTextWriterPtr writer, const char *name, const char **content, const char *ns) { + if (content) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + if (content[i] == NULL) { + break; + } + xmlChar *namespace = NULL; + if (ns) { + namespace = BAD_CAST ns; + } + int xml_ret = xmlTextWriterWriteElementNS(writer, namespace, BAD_CAST name, NULL, BAD_CAST content[i]); + if (xml_ret < 0) { + debug_print("XML error: %i (name: %s, content: %s)\n", xml_ret, name, content[i]); + return MOBI_XML_ERR; + } + i++; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Write array of Dublin Core elements of given name to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElementNS() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] name XML element name + @param[in] content Array of XML element contents + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_dcmeta(xmlTextWriterPtr writer, const char *name, const char **content) { + return mobi_xml_write_element_ns(writer, name, content, "dc"); +} + +/** + @brief Write array of custom MOBI elements of given name to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElementNS() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] name XML element name + @param[in] content Array of XML element contents + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_xmeta(xmlTextWriterPtr writer, const char *name, const char **content) { + return mobi_xml_write_element_ns(writer, name, content, NULL); +} + +/** + @brief Write array of elements to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElement() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] meta Array of OPFmeta structures + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_opfmeta(xmlTextWriterPtr writer, const OPFmeta **meta) { + if (meta) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + if (meta[i] == NULL) { + break; + } + MOBI_RET ret = mobi_xml_write_meta(writer, meta[i]->name, meta[i]->content); + if (ret != MOBI_SUCCESS) { + return ret; + } + i++; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Write array of elements to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElement() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] reference Array of OPFreference structures + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_reference(xmlTextWriterPtr writer, const OPFreference **reference) { + if (reference) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + if (reference[i] == NULL) { + break; + } + int xml_ret; + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "reference"); + if (xml_ret < 0) { + debug_print("XML error: %i (reference type: %s)\n", xml_ret, reference[i]->type); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST reference[i]->type); + if (xml_ret < 0) { + debug_print("XML error: %i (reference type: %s)\n", xml_ret, reference[i]->type); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "title", BAD_CAST reference[i]->title); + if (xml_ret < 0) { + debug_print("XML error: %i (reference type: %s)\n", xml_ret, reference[i]->type); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "href", BAD_CAST reference[i]->href); + if (xml_ret < 0) { + debug_print("XML error: %i (reference type: %s)\n", xml_ret, reference[i]->type); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (reference type: %s)\n", xml_ret, reference[i]->type); + return MOBI_XML_ERR; + } + i++; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Write single element to XML buffer + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] id Attribute "id" + @param[in] href Attribute "href" + @param[in] media_type Attribute "media-type" + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_item(xmlTextWriterPtr writer, const char *id, const char *href, const char *media_type) { + int xml_ret; + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "item"); + if (xml_ret < 0) { + debug_print("XML error: %i (item id: %s)\n", xml_ret, id); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST id); + if (xml_ret < 0) { + debug_print("XML error: %i (item id: %s)\n", xml_ret, id); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "href", BAD_CAST href); + if (xml_ret < 0) { + debug_print("XML error: %i (item id: %s)\n", xml_ret, id); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "media-type", BAD_CAST media_type); + if (xml_ret < 0) { + debug_print("XML error: %i (item id: %s)\n", xml_ret, id); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (item id: %s)\n", xml_ret, id); + return MOBI_XML_ERR; + } + return MOBI_SUCCESS; +} + +/** + @brief Write opf part to XML buffer + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] rawml MOBIRawml structure containing parts metadata + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_spine(xmlTextWriterPtr writer, const MOBIRawml *rawml) { + if (!rawml || !rawml->resources || !rawml->markup || !writer) { + return MOBI_INIT_FAILED; + } + /* get toc id */ + char ncxid[13 + 1]; + MOBIPart *curr = rawml->resources; + while (curr != NULL && curr->type != T_NCX) { + curr = curr->next; + } + if (curr) { + sprintf(ncxid, "resource%05zu", curr->uid); + } else { + return MOBI_DATA_CORRUPT; + } + int xml_ret; + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "spine"); + if (xml_ret < 0) { + debug_print("XML error: %i (spine)\n", xml_ret); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "toc", BAD_CAST ncxid); + if (xml_ret < 0) { + debug_print("XML error: %i (spine toc: %s)\n", xml_ret, ncxid); + return MOBI_XML_ERR; + } + char id[9 + 1]; + curr = rawml->markup; + while (curr != NULL) { + sprintf(id, "part%05zu", curr->uid); + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "itemref"); + if (xml_ret < 0) { + debug_print("XML error: %i (itemref)\n", xml_ret); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "idref", BAD_CAST id); + if (xml_ret < 0) { + debug_print("XML error: %i (idref: %s)\n", xml_ret, id); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (idref: %s)\n", xml_ret, id); + return MOBI_XML_ERR; + } + curr = curr->next; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (spine)\n", xml_ret); + return MOBI_XML_ERR; + } + return MOBI_SUCCESS; +} + +/** + @brief Write all manifest elements to XML buffer + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] rawml MOBIRawml structure containing parts metadata + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_manifest(xmlTextWriterPtr writer, const MOBIRawml *rawml) { + char href[256]; + char id[256]; + if (rawml->flow != NULL) { + MOBIPart *curr = rawml->flow; + /* skip first raw html part */ + curr = curr->next; + while (curr != NULL) { + MOBIFileMeta file_meta = mobi_get_filemeta_by_type(curr->type); + sprintf(href, "flow%05zu.%s", curr->uid, file_meta.extension); + sprintf(id, "flow%05zu", curr->uid); + MOBI_RET ret = mobi_xml_write_item(writer, id, href, file_meta.mime_type); + if (ret != MOBI_SUCCESS) { + return ret; + } + curr = curr->next; + } + } + if (rawml->markup != NULL) { + MOBIPart *curr = rawml->markup; + while (curr != NULL) { + MOBIFileMeta file_meta = mobi_get_filemeta_by_type(curr->type); + sprintf(href, "part%05zu.%s", curr->uid, file_meta.extension); + sprintf(id, "part%05zu", curr->uid); + MOBI_RET ret = mobi_xml_write_item(writer, id, href, file_meta.mime_type); + if (ret != MOBI_SUCCESS) { + return ret; + } + curr = curr->next; + } + } + if (rawml->resources != NULL) { + MOBIPart *curr = rawml->resources; + while (curr != NULL) { + MOBIFileMeta file_meta = mobi_get_filemeta_by_type(curr->type); + sprintf(href, "resource%05zu.%s", curr->uid, file_meta.extension); + sprintf(id, "resource%05zu", curr->uid); + MOBI_RET ret = mobi_xml_write_item(writer, id, href, file_meta.mime_type); + if (ret != MOBI_SUCCESS) { + return ret; + } + curr = curr->next; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Write array of Dublin Core identifier elements to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElementNS() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] identifier OPFidentifier structure representing identifier element + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_dcmeta_identifier(xmlTextWriterPtr writer, const OPFidentifier **identifier) { + if (identifier) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + if (identifier[i] == NULL || identifier[i]->value == NULL) { + break; + } + int xml_ret; + xml_ret = xmlTextWriterStartElementNS(writer, BAD_CAST "dc", BAD_CAST "identifier", NULL); + if (xml_ret < 0) { + debug_print("XML error: %i (identifier value: %s)\n", xml_ret, identifier[i]->value); + return MOBI_XML_ERR; + } + if (identifier[i]->id) { + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST identifier[i]->id); + if (xml_ret < 0) { + debug_print("XML error: %i (identifier id: %s)\n", xml_ret, identifier[i]->id); + return MOBI_XML_ERR; + } + } + if (identifier[i]->scheme) { + xml_ret = xmlTextWriterWriteAttributeNS(writer, BAD_CAST "opf", BAD_CAST "scheme", NULL, BAD_CAST identifier[i]->scheme); + if (xml_ret < 0) { + debug_print("XML error: %i (identifier value: %s)\n", xml_ret, identifier[i]->value); + return MOBI_XML_ERR; + } + } + xml_ret = xmlTextWriterWriteString(writer, BAD_CAST identifier[i]->value); + if (xml_ret < 0) { + debug_print("XML error: %i (identifier value: %s)\n", xml_ret, identifier[i]->value); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (identifier value: %s)\n", xml_ret, identifier[i]->value); + return MOBI_XML_ERR; + } + i++; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Write array of Dublin Core creator/contributor elements to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElementNS() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] creator OPFcreator structure representing creator/contributor element + @param[in] name OPF creator value + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_dcmeta_creator(xmlTextWriterPtr writer, const OPFcreator **creator, const char *name) { + if (creator) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + if (creator[i] == NULL || creator[i]->value == NULL) { + break; + } + int xml_ret; + xml_ret = xmlTextWriterStartElementNS(writer, BAD_CAST "dc", BAD_CAST name, NULL); + if (xml_ret < 0) { + debug_print("XML error: %i (creator value: %s)\n", xml_ret, creator[i]->value); + return MOBI_XML_ERR; + } + if (creator[i]->role) { + xml_ret = xmlTextWriterWriteAttributeNS(writer, BAD_CAST "opf", BAD_CAST "role", NULL, BAD_CAST creator[i]->role); + if (xml_ret < 0) { + debug_print("XML error: %i (creator role: %s)\n", xml_ret, creator[i]->role); + return MOBI_XML_ERR; + } + } + if (creator[i]->file_as) { + xml_ret = xmlTextWriterWriteAttributeNS(writer, BAD_CAST "opf", BAD_CAST "file-as", NULL, BAD_CAST creator[i]->file_as); + if (xml_ret < 0) { + debug_print("XML error: %i (creator file-as: %s)\n", xml_ret, creator[i]->file_as); + return MOBI_XML_ERR; + } + } + xml_ret = xmlTextWriterWriteString(writer, BAD_CAST creator[i]->value); + if (xml_ret < 0) { + debug_print("XML error: %i (creator value: %s)\n", xml_ret, creator[i]->value); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (creator value: %s)\n", xml_ret, creator[i]->value); + return MOBI_XML_ERR; + } + i++; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Write array of Dublin Core subject elements to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElementNS() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] subject OPFsubject structure representing subject element + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_dcmeta_subject(xmlTextWriterPtr writer, const OPFsubject **subject) { + if (subject) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + if (subject[i] == NULL || subject[i]->value == NULL) { + break; + } + int xml_ret; + xml_ret = xmlTextWriterStartElementNS(writer, BAD_CAST "dc", BAD_CAST "subject", NULL); + if (xml_ret < 0) { + debug_print("XML error: %i (subject value: %s)\n", xml_ret, subject[i]->value); + return MOBI_XML_ERR; + } + if (subject[i]->basic_code) { + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "BASICCode", BAD_CAST subject[i]->basic_code); + if (xml_ret < 0) { + debug_print("XML error: %i (subject BASICCode: %s)\n", xml_ret, subject[i]->basic_code); + return MOBI_XML_ERR; + } + } + xml_ret = xmlTextWriterWriteString(writer, BAD_CAST subject[i]->value); + if (xml_ret < 0) { + debug_print("XML error: %i (subject value: %s)\n", xml_ret, subject[i]->value); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (subject value: %s)\n", xml_ret, subject[i]->value); + return MOBI_XML_ERR; + } + i++; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Write array of Dublin Core date elements to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElementNS() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] date OPFdate structure representing date element + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_dcmeta_date(xmlTextWriterPtr writer, const OPFdate **date) { + if (date) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + if (date[i] == NULL || date[i]->value == NULL) { + break; + } + int xml_ret; + xml_ret = xmlTextWriterStartElementNS(writer, BAD_CAST "dc", BAD_CAST "date", NULL); + if (xml_ret < 0) { + debug_print("XML error: %i (date value: %s)\n", xml_ret, date[i]->value); + return MOBI_XML_ERR; + } + if (date[i]->event) { + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "event", BAD_CAST date[i]->event); + if (xml_ret < 0) { + debug_print("XML error: %i (date event: %s)\n", xml_ret, date[i]->event); + return MOBI_XML_ERR; + } + } + xml_ret = xmlTextWriterWriteString(writer, BAD_CAST date[i]->value); + if (xml_ret < 0) { + debug_print("XML error: %i (date value: %s)\n", xml_ret, date[i]->value); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (date value: %s)\n", xml_ret, date[i]->value); + return MOBI_XML_ERR; + } + i++; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Write array of custom srp elements to XML buffer + + Wrapper for libxml2 xmlTextWriterWriteElementNS() function. + Writes xml element for each not-null entry in the input array. + + @param[in,out] writer xmlTextWriterPtr to write to + @param[in] srp OPFsrp structure representing srp element + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_xml_write_xmeta_srp(xmlTextWriterPtr writer, const OPFsrp **srp) { + if (srp) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + if (srp[i] == NULL || srp[i]->value == NULL) { + break; + } + int xml_ret; + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "srp"); + if (xml_ret < 0) { + debug_print("XML error: %i (srp value: %s)\n", xml_ret, srp[i]->value); + return MOBI_XML_ERR; + } + if (srp[i]->currency) { + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "currency", BAD_CAST srp[i]->currency); + if (xml_ret < 0) { + debug_print("XML error: %i (srp currency: %s)\n", xml_ret, srp[i]->currency); + return MOBI_XML_ERR; + } + } + xml_ret = xmlTextWriterWriteString(writer, BAD_CAST srp[i]->value); + if (xml_ret < 0) { + debug_print("XML error: %i (srp value: %s)\n", xml_ret, srp[i]->value); + return MOBI_XML_ERR; + } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { + debug_print("XML error: %i (srp value: %s)\n", xml_ret, srp[i]->value); + return MOBI_XML_ERR; + } + i++; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Free array of OPF sturcture members + + @param[in] array Array + */ +void mobi_free_opf_array(char **array) { + if (array) { + size_t i = 0; + while (i < OPF_META_MAX_TAGS) { + if (array[i] == NULL) { + break; + } + free(array[i]); + i++; + } + free(array); + } +} + +/** + @brief Macro to free generic OPF structure with two members + + @param[in] struct_array Structure name + @param[in] struct_member1 Structure member 1 + @param[in] struct_member2 Structure member 2 + */ +#define mobi_free_opf_struct_2el(struct_array, struct_member1, struct_member2) { \ + if (struct_array) { \ + size_t i = 0; \ + while (i < OPF_META_MAX_TAGS) { \ + if (struct_array[i] == NULL) { \ + break; \ + } \ + free(struct_array[i]->struct_member1); \ + free(struct_array[i]->struct_member2); \ + free(struct_array[i]); \ + i++; \ + } \ + free(struct_array); \ + } \ +} + +/** + @brief Macro to free generic OPF structure with three members + + @param[in] struct_array Structure name + @param[in] struct_member1 Structure member 1 + @param[in] struct_member2 Structure member 2 + @param[in] struct_member3 Structure member 3 + */ +#define mobi_free_opf_struct_3el(struct_array, struct_member1, struct_member2, struct_member3) { \ + if (struct_array) { \ + size_t i = 0; \ + while (i < OPF_META_MAX_TAGS) { \ + if (struct_array[i] == NULL) { \ + break; \ + } \ + free(struct_array[i]->struct_member1); \ + free(struct_array[i]->struct_member2); \ + free(struct_array[i]->struct_member3); \ + free(struct_array[i]); \ + i++; \ + } \ + free(struct_array); \ + } \ +} + +/** + @brief Free OPF metadata structure and data + + @param[in] metadata OPF opf->metadata structure + */ +void mobi_free_opf_metadata(OPFmetadata *metadata) { + if (metadata) { + /* */ + mobi_free_opf_struct_2el(metadata->meta, name, content); + /* */ + mobi_free_opf_struct_3el(metadata->dc_meta->contributor, value, file_as, role); + mobi_free_opf_struct_3el(metadata->dc_meta->creator, value, file_as, role); + mobi_free_opf_struct_3el(metadata->dc_meta->identifier, value, id, scheme); + mobi_free_opf_struct_2el(metadata->dc_meta->subject, value, basic_code); + mobi_free_opf_struct_2el(metadata->dc_meta->date, value, event); + mobi_free_opf_array(metadata->dc_meta->description); + mobi_free_opf_array(metadata->dc_meta->language); + mobi_free_opf_array(metadata->dc_meta->publisher); + mobi_free_opf_array(metadata->dc_meta->rights); + mobi_free_opf_array(metadata->dc_meta->source); + mobi_free_opf_array(metadata->dc_meta->title); + mobi_free_opf_array(metadata->dc_meta->type); + free(metadata->dc_meta); + /* */ + mobi_free_opf_struct_2el(metadata->x_meta->srp, value, currency); + mobi_free_opf_array(metadata->x_meta->adult); + mobi_free_opf_array(metadata->x_meta->default_lookup_index); + mobi_free_opf_array(metadata->x_meta->dict_short_name); + mobi_free_opf_array(metadata->x_meta->dictionary_in_lang); + mobi_free_opf_array(metadata->x_meta->dictionary_out_lang); + mobi_free_opf_array(metadata->x_meta->embedded_cover); + mobi_free_opf_array(metadata->x_meta->imprint); + mobi_free_opf_array(metadata->x_meta->review); + free(metadata->x_meta); + free(metadata); + } +} + +/** + @brief Free OPFmanifest structure and data + + @param[in] manifest OPF opf->manifest structure + */ +void mobi_free_opf_manifest(OPFmanifest *manifest) { + if (manifest) { + mobi_free_opf_struct_3el(manifest->item, id, href, media_type); + free(manifest); + } +} + +/** + @brief Free OPFspine structure and data + + @param[in] spine OPF opf->spine structure + */ +void mobi_free_opf_spine(OPFspine *spine) { + if (spine) { + mobi_free_opf_array(spine->itemref); + free(spine->toc); + free(spine); + } +} + +/** + @brief Free OPFguide structure and data + + @param[in] guide OPF opf->guide structure + */ +void mobi_free_opf_guide(OPFguide *guide) { + if (guide) { + mobi_free_opf_struct_3el(guide->reference, type, title, href); + free(guide); + } +} + +/** + @brief Free OPF structure and data + + @param[in] opf OPF structure + */ +void mobi_free_opf(OPF *opf) { + mobi_free_opf_metadata(opf->metadata); + mobi_free_opf_manifest(opf->manifest); + mobi_free_opf_spine(opf->spine); + mobi_free_opf_guide(opf->guide); +} + +/** + @brief Recreate OPF structure + + This function will fill OPF structure with parsed index data and convert it to xml file. The file will be stored in MOBIRawml structure. + + @param[in,out] rawml OPF xml file will be appended to rawml->markup linked list + @param[in] m MOBIData structure containing document metadata + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_build_opf(MOBIRawml *rawml, const MOBIData *m) { + /* initialize libXML2 */ + LIBXML_TEST_VERSION + /* initialize OPF structure */ + OPF opf = { + .metadata = NULL, + .manifest = NULL, + .guide = NULL, + .spine = NULL + }; + MOBI_RET ret = mobi_build_opf_metadata(&opf, m); + if (ret != MOBI_SUCCESS) { + mobi_free_opf(&opf); + return ret; + } + mobi_build_ncx(rawml, &opf); + if (rawml->guide) { + ret = mobi_build_opf_guide(&opf, rawml); + if (ret != MOBI_SUCCESS) { + mobi_free_opf(&opf); + return ret; + } + } + + /* build OPF xml document */ + int xml_ret; + const xmlChar * OPFNamespace = BAD_CAST "http://www.idpf.org/2007/opf"; + const xmlChar * DCNamespace = BAD_CAST "http://purl.org/dc/elements/1.1/"; + xmlBufferPtr buf = xmlBufferCreate(); + if (buf == NULL) { + mobi_free_opf(&opf); + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + xmlTextWriterPtr writer = xmlNewTextWriterMemory(buf, 0); + if (writer == NULL) { + xmlBufferFree(buf); + mobi_free_opf(&opf); + debug_print("%s\n", "Memory allocation failed"); + return MOBI_MALLOC_FAILED; + } + xmlTextWriterSetIndent(writer, 1); + xml_ret = xmlTextWriterStartDocument(writer, NULL, NULL, NULL); + if (xml_ret < 0) { goto cleanup; } + /* */ + xml_ret = xmlTextWriterStartElementNS(writer, NULL, BAD_CAST "package", OPFNamespace); + if (xml_ret < 0) { goto cleanup; } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "version", BAD_CAST "2.0"); + if (xml_ret < 0) { goto cleanup; } + xml_ret = xmlTextWriterWriteAttribute(writer, BAD_CAST "unique-identifier", BAD_CAST "uid"); + if (xml_ret < 0) { goto cleanup; } + /* */ + xml_ret = xmlTextWriterStartElementNS(writer, NULL, BAD_CAST "metadata", NULL); + if (xml_ret < 0) { goto cleanup; } + /* */ + //xml_ret = xmlTextWriterStartElementNS(writer, NULL, BAD_CAST "dc-metadata", NULL); + //if (xml_ret < 0) { goto cleanup; } + xml_ret = xmlTextWriterWriteAttributeNS(writer, BAD_CAST "xmlns", BAD_CAST "opf", NULL, OPFNamespace); + if (xml_ret < 0) { goto cleanup; } + xml_ret = xmlTextWriterWriteAttributeNS(writer, BAD_CAST "xmlns", BAD_CAST "dc", NULL, DCNamespace); + if (xml_ret < 0) { goto cleanup; } + /* Dublin Core elements */ + OPFdcmeta *dc_meta = opf.metadata->dc_meta; + ret = mobi_xml_write_dcmeta(writer, "title", (const char **) dc_meta->title); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta(writer, "description", (const char **) dc_meta->description); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta(writer, "language", (const char **) dc_meta->language); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta(writer, "publisher", (const char **) dc_meta->publisher); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta(writer, "rights", (const char **) dc_meta->rights); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta(writer, "source", (const char **) dc_meta->source); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta(writer, "type", (const char **) dc_meta->type); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta_identifier(writer, (const OPFidentifier **) dc_meta->identifier); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta_creator(writer, (const OPFcreator **) dc_meta->creator, "creator"); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta_creator(writer, (const OPFcreator **) dc_meta->contributor, "contributor"); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta_subject(writer, (const OPFsubject **) dc_meta->contributor); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_dcmeta_date(writer, (const OPFdate **) dc_meta->date); + if (ret != MOBI_SUCCESS) { goto cleanup; } + //xml_ret = xmlTextWriterEndElement(writer); + //if (xml_ret < 0) { goto cleanup; } + /* */ + //xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "x-metadata"); + //if (xml_ret < 0) { goto cleanup; } + OPFxmeta *x_meta = opf.metadata->x_meta; + /* custom elements */ + ret = mobi_xml_write_xmeta_srp(writer, (const OPFsrp **) x_meta->srp); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_xmeta(writer, "adult", (const char **) x_meta->adult); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_xmeta(writer, "default_lookup_index", (const char **) x_meta->default_lookup_index); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_xmeta(writer, "dict_short_name", (const char **) x_meta->dict_short_name); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_xmeta(writer, "dictionary_in_lang", (const char **) x_meta->dictionary_in_lang); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_xmeta(writer, "dictionary_out_lang", (const char **) x_meta->dictionary_out_lang); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_xmeta(writer, "embedded_cover", (const char **) x_meta->embedded_cover); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_xmeta(writer, "imprint", (const char **) x_meta->imprint); + if (ret != MOBI_SUCCESS) { goto cleanup; } + ret = mobi_xml_write_xmeta(writer, "review", (const char **) x_meta->review); + if (ret != MOBI_SUCCESS) { goto cleanup; } + /* */ + ret = mobi_xml_write_opfmeta(writer, (const OPFmeta **) opf.metadata->meta); + if (ret != MOBI_SUCCESS) { goto cleanup; } + //xml_ret = xmlTextWriterEndElement(writer); + //if (xml_ret < 0) { goto cleanup; } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { goto cleanup; } + /* */ + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "manifest"); + if (xml_ret < 0) { goto cleanup; } + ret = mobi_xml_write_manifest(writer, rawml); + if (ret != MOBI_SUCCESS) { goto cleanup; } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { goto cleanup; } + /* */ + ret = mobi_xml_write_spine(writer, rawml); + if (ret != MOBI_SUCCESS) { goto cleanup; } + /* */ + if (opf.guide) { + xml_ret = xmlTextWriterStartElement(writer, BAD_CAST "guide"); + if (xml_ret < 0) { goto cleanup; } + ret = mobi_xml_write_reference(writer, (const OPFreference **) opf.guide->reference); + if (ret != MOBI_SUCCESS) { goto cleanup; } + xml_ret = xmlTextWriterEndElement(writer); + if (xml_ret < 0) { goto cleanup; } + } + xml_ret = xmlTextWriterEndDocument(writer); + if (xml_ret < 0) { goto cleanup; } + + xmlFreeTextWriter(writer); + const char *opf_xml = (const char *) buf->content; + mobi_opf_add_to_rawml(opf_xml, rawml); + xmlBufferFree(buf); + mobi_free_opf(&opf); + /* cleanup function for the XML library */ + xmlCleanupParser(); + return MOBI_SUCCESS; + +cleanup: + xmlFreeTextWriter(writer); + xmlBufferFree(buf); + mobi_free_opf(&opf); + xmlCleanupParser(); + debug_print("%s\n", "XML writing failed"); + return MOBI_XML_ERR; +} diff --git a/src/opf.h b/src/opf.h new file mode 100644 index 0000000..52b6c2d --- /dev/null +++ b/src/opf.h @@ -0,0 +1,164 @@ +/** @file opf.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + +#ifndef libmobi_opf_h +#define libmobi_opf_h + +#include "config.h" +#include "mobi.h" + +/** @brief Maximum number of opf meta tags */ +#define OPF_META_MAX_TAGS 256 + +/** + @defgroup mobi_opf OPF handling structures + @{ + */ + +/** @brief OPF element structure + + At least one identifier must have an id specified, + so it can be referenced from the package unique-identifier attribute. + */ +typedef struct { + char *value; /**< element value */ + char *id; /**< id attribute */ + char *scheme; /**< opf:scheme (optional) */ +} OPFidentifier; + +/** @brief OPF element structure + + Also applies to element + */ +typedef struct { + char *value; /**< element value */ + char *file_as; /**< opf:file-as attribute (optional) */ + char *role; /**< opf:role attribute (optional) */ +} OPFcreator; + +/** @brief OPF element structure */ +typedef struct { + char *value; /**< element value */ + char *basic_code; /**< BASICCode attribute (optional, non-standard) */ +} OPFsubject; + +/** @brief OPF element structure + + Format: YYYY[-MM[-DD]] + */ +typedef struct { + char *value; /**< element value */ + char *event; /**< opf:event attribute (optional) */ +} OPFdate; + +/** @brief OPF element structure */ +typedef struct { + OPFcreator **contributor; /**< element (optional) */ + OPFcreator **creator; /**< element (optional) */ + OPFidentifier **identifier; /**< element (required) */ + OPFsubject **subject; /**< element (optional) */ + OPFdate **date; /**< element (optional) */ + char **description; /**< element (optional) */ + char **language; /**< element (required) */ + char **publisher; /**< element (optional) */ + char **rights; /**< element (optional) */ + char **source; /**< element (optional) */ + char **title; /**< element (required) */ + char **type; /**< element (optional) */ +} OPFdcmeta; + +/** @brief OPF element structure */ +typedef struct { + char *value; /**< element value */ + char *currency; /**< currency attribute */ +} OPFsrp; + +/** @brief OPF element structure */ +typedef struct { + OPFsrp **srp; /**< element */ + char **adult; /**< element */ + char **default_lookup_index; /**< element */ + char **dict_short_name; /**< element */ + char **dictionary_in_lang; /**< element */ + char **dictionary_out_lang; /**< element */ + char **embedded_cover; /**< element */ + char **imprint; /**< element */ + char **review; /**< element */ +} OPFxmeta; + +/** @brief OPF element structure */ +typedef struct { + char *name; /**< name attribute (required) */ + char *content; /**< content attribute (required) */ +} OPFmeta; + +/** @brief OPF element structure */ +typedef struct { + OPFmeta **meta; /**< element (optional) */ + OPFdcmeta *dc_meta; /**< element */ + OPFxmeta *x_meta; /**< element */ +} OPFmetadata; + +/** @brief OPF element structure */ +typedef struct { + char *id; /**< id attribute (required) */ + char *href; /**< href attribute (required) */ + char *media_type; /**< media-type attribute (required) */ +} OPFitem; + +/** @brief OPF element structure */ +typedef struct { + OPFitem **item; /**< element */ +} OPFmanifest; + +/** @brief OPF element structure */ +typedef struct { + char *toc; /**< toc attribute (required) */ + char **itemref; /**< element */ +} OPFspine; + +/** @brief OPF tag structure */ +typedef struct { + char *type; /**< type attribute (required) */ + char *title; /**< title attribute */ + char *href; /**< href attribute (required) */ +} OPFreference; + +/** @brief OPF element structure */ +typedef struct { + OPFreference **reference; /**< element tag */ +} OPFguide; + +/** @brief OPF element structure */ +typedef struct { + //char *uid; /**< */ + OPFmetadata *metadata; /**< (required) */ + OPFmanifest *manifest; /**< (required) */ + OPFspine *spine; /**< (required) */ + OPFguide *guide; /**< (optional) */ +} OPF; + +/** @brief NCX index entry structure */ +typedef struct { + size_t id; /**< Sequential id */ + char *text; /**< Entry text content */ + char *target; /**< Entry target reference */ + size_t level; /**< Entry level */ + size_t parent; /**< Entry parent */ + size_t first_child; /**< First child id */ + size_t last_child; /**< Last child id */ +} NCX; +/** @} */ + + +MOBI_RET mobi_build_opf(MOBIRawml *rawml, const MOBIData *m); +MOBI_RET mobi_build_ncx(MOBIRawml *rawml, const OPF *opf); + +#endif diff --git a/src/parse_rawml.c b/src/parse_rawml.c new file mode 100644 index 0000000..ef97212 --- /dev/null +++ b/src/parse_rawml.c @@ -0,0 +1,1656 @@ +/** @file parse_rawml.c + * @brief Functions for parsing rawml markup + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + +#include +#include +#include +#include "parse_rawml.h" +#include "util.h" +#include "opf.h" +#include "index.h" +#include "debug.h" + + +/** + @brief Convert kindle:pos:fid:x:off:y to offset in rawml raw text file + + @param[in] rawml MOBIRawml parsed records structure + @param[in] pos_fid X value of pos:fid:x + @param[in] pos_off Y value of off:y + @return Offset in rawml buffer on success, SIZE_MAX otherwise + */ +size_t mobi_get_rawlink_location(const MOBIRawml *rawml, const uint32_t pos_fid, const uint32_t pos_off) { + if (!rawml || !rawml->frag || !rawml->frag->entries ) { + debug_print("%s", "Initialization failed\n"); + return SIZE_MAX; + } + if (pos_fid >= rawml->frag->entries_count) { + debug_print("%s", "pos_fid not found\n"); + return SIZE_MAX; + } + const MOBIIndexEntry *entry = &rawml->frag->entries[pos_fid]; + const size_t insert_position = strtoul(entry->label, NULL, 10); + size_t file_offset = insert_position + pos_off; + return file_offset; +} + +/** + @brief Find first occurence of attribute to be replaced in KF7 html + + It searches for filepos and recindex attributes + + @param[in,out] result MOBIResult structure will be filled with found data + @param[in] data_start Beginning of the memory area to search in + @param[in] data_end End of the memory area to search in + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_search_links_kf7(MOBIResult *result, const unsigned char *data_start, const unsigned char *data_end) { + if (!result) { + debug_print("Result structure is null%s", "\n"); + return MOBI_PARAM_ERR; + } + result->start = result->end = NULL; + *(result->value) = '\0'; + if (!data_start || !data_end) { + debug_print("Data is null%s", "\n"); + return MOBI_PARAM_ERR; + } + const char *needle1 = "filepos="; + const char *needle2 = "recindex="; + const size_t needle1_length = strlen(needle1); + const size_t needle2_length = strlen(needle2); + const size_t needle_length = max(needle1_length,needle2_length); + if (data_start + needle_length > data_end) { + return MOBI_SUCCESS; + } + unsigned char *data = (unsigned char *) data_start; + unsigned char last_border = '>'; + const unsigned char tag_open = '<'; + const unsigned char tag_close = '>'; + while (data <= data_end) { + if (*data == tag_open || *data == tag_close) { + last_border = *data; + } + if (data + needle_length <= data_end && + (memcmp(data, needle1, needle1_length) == 0 || + memcmp(data, needle2, needle2_length) == 0)) { + /* found match */ + if (last_border != tag_open) { + /* opening char not found, not an attribute */ + data += needle_length; + continue; + } + /* go to attribute beginning */ + while (data >= data_start && !isspace(*data) && *data != tag_open) { + data--; + } + result->start = ++data; + /* now go forward */ + int i = 0; + while (data <= data_end && !isspace(*data) && *data != tag_close && i < MOBI_ATTRVALUE_MAXSIZE) { + result->value[i++] = (char) *data++; + } + /* self closing tag '/>' */ + if (*(data - 1) == '/' && *data == '>') { + --data; --i; + } + result->end = data; + result->value[i] = '\0'; + return MOBI_SUCCESS; + } + data++; + } + return MOBI_SUCCESS; +} + +/** + @brief Find first occurence of markup attribute with given string + + @param[in,out] result MOBIResult structure will be filled with found data + @param[in] data_start Beginning of the memory area to search in + @param[in] data_end End of the memory area to search in + @param[in] type Type of data (T_HTML or T_CSS) + @param[in] needle String to find + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_search_markup(MOBIResult *result, const unsigned char *data_start, const unsigned char *data_end, const MOBIFiletype type, const char *needle) { + if (!result) { + debug_print("Result structure is null%s", "\n"); + return MOBI_PARAM_ERR; + } + result->start = result->end = NULL; + *(result->value) = '\0'; + if (!data_start || !data_end) { + debug_print("Data is null%s", "\n"); + return MOBI_PARAM_ERR; + } + size_t needle_length = strlen(needle); + if (needle_length > MOBI_ATTRNAME_MAXSIZE) { + debug_print("Attribute too long: %zu\n", needle_length); + return MOBI_PARAM_ERR; + } + if (data_start + needle_length > data_end) { + return MOBI_SUCCESS; + } + unsigned char *data = (unsigned char *) data_start; + unsigned char last_border = '>'; + unsigned char tag_open; + unsigned char tag_close; + if (type == T_CSS) { + tag_open = '{'; + tag_close = '}'; + } else { + tag_open = '<'; + tag_close = '>'; + } + while (data <= data_end) { + if (*data == tag_open || *data == tag_close) { + last_border = *data; + } + if (data + needle_length <= data_end && memcmp(data, needle, needle_length) == 0) { + /* found match */ + if (last_border != tag_open) { + /* opening char not found, not an attribute */ + data += needle_length; + continue; + } + /* go to attribute value beginning */ + while (data >= data_start && !isspace(*data) && *data != tag_open && *data != '=' && *data != '(') { + data--; + } + result->is_url = (*data == '('); + result->start = ++data; + /* now go forward */ + int i = 0; + while (data <= data_end && !isspace(*data) && *data != tag_close && *data != ')' && i < MOBI_ATTRVALUE_MAXSIZE) { + result->value[i++] = (char) *data++; + } + /* self closing tag '/>' */ + if (*(data - 1) == '/' && *data == '>') { + --data; --i; + } + result->end = data; + result->value[i] = '\0'; + return MOBI_SUCCESS; + } + data++; + } + return MOBI_SUCCESS; +} + +/** + @brief Find first occurence of attribute part to be replaced in KF8 html/css + + It searches for "kindle:" value in attributes + + @param[in,out] result MOBIResult structure will be filled with found data + @param[in] data_start Beginning of the memory area to search in + @param[in] data_end End of the memory area to search in + @param[in] type Type of data (T_HTML or T_CSS) + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_search_links_kf8(MOBIResult *result, const unsigned char *data_start, const unsigned char *data_end, const MOBIFiletype type) { + return mobi_search_markup(result, data_start, data_end, type, "kindle:"); +} + +/** + @brief Get value and offset of the first found attribute with given name + + @param[in,out] value String value of the attribute, will be filled by the function, zero length if not found + @param[in] data Data to search in + @param[in] size Data size + @param[in] attribute Attribute name + @param[in] only_quoted Require the value to be quoted if true, allow no quotes (eg. filepos=00001) if false + @return Offset from the beginning of the data, SIZE_MAX if not found + */ +size_t mobi_get_attribute_value(char *value, const unsigned char *data, const size_t size, const char *attribute, bool only_quoted) { + if (!data) { + debug_print("Data is null%s", "\n"); + return SIZE_MAX; + } + size_t length = size; + size_t attr_length = strlen(attribute); + if (attr_length > MOBI_ATTRNAME_MAXSIZE) { + debug_print("Attribute too long: %zu\n", attr_length); + return SIZE_MAX; + } + char attr[MOBI_ATTRNAME_MAXSIZE + 2]; + strcpy(attr, attribute); + strcat(attr, "="); + attr_length++; + if (size < attr_length) { + return SIZE_MAX; + } + /* FIXME: search may start inside tag, so it is a safer option */ + unsigned char last_border = '\0'; + do { + if (*data == '<' || *data == '>') { + last_border = *data; + } + if (length > attr_length + 1 && memcmp(data, attr, attr_length) == 0) { + /* found match */ + size_t offset = size - length; + if (last_border == '>') { + /* We are in tag contents */ + data += attr_length; + length -= attr_length - 1; + continue; + } + /* previous character should be white space or opening tag */ + if (offset > 0) { + if (data[-1] != '<' && !isspace(data[-1])) { + data += attr_length; + length -= attr_length - 1; + continue; + } + } + /* now go forward */ + data += attr_length; + length -= attr_length; + unsigned char separator; + if (*data != '\'' && *data != '"') { + if (only_quoted) { + continue; + } + separator = ' '; + } else { + separator = *data; + data++; + length--; + } + size_t j; + for (j = 0; j < MOBI_ATTRVALUE_MAXSIZE && length && *data != separator; j++) { + *value++ = (char) *data++; + length--; + } + *value = '\0'; + /* return offset to the beginning of the attribute value string */ + return size - length - j; + } + data++; + } while (--length); + value[0] = '\0'; + return SIZE_MAX; +} + +/** + @brief Get offset of the given value of an "aid" attribute in a given part + + @param[in] aid String value of "aid" attribute + @param[in] html MOBIPart html part + @return Offset from the beginning of the html part data, SIZE_MAX on failure + */ +size_t mobi_get_aid_offset(const MOBIPart *html, const char *aid) { + size_t length = html->size; + const char *data = (char *) html->data; + const size_t aid_length = strlen(aid); + const size_t attr_length = 5; /* "aid='" length */ + do { + if (length > (aid_length + attr_length) && memcmp(data, "aid=", attr_length - 1) == 0) { + data += attr_length; + length -= attr_length; + if (memcmp(data, aid, aid_length) == 0) { + if (data[aid_length] == '\'' || data[aid_length] == '"') { + return html->size - length; + } + } + } + data++; + } while (--length); + return SIZE_MAX; +} + +/** + @brief Convert kindle:pos:fid:x:off:y to skeleton part number and offset from the beginning of the part + + @param[in,out] file_number Will be set to file number value + @param[in,out] offset Offset from the beginning of the skeleton part + @param[in] rawml MOBIRawml parsed records structure + @param[in] pos_fid X value of pos:fid:x + @param[in] pos_off X value of pos:off:x + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_offset_by_posoff(uint32_t *file_number, size_t *offset, const MOBIRawml *rawml, const size_t pos_fid, const size_t pos_off) { + if (!rawml || !rawml->frag || !rawml->frag->entries || + !rawml->skel || !rawml->skel->entries) { + debug_print("%s", "Initialization failed\n"); + return MOBI_INIT_FAILED; + } + MOBI_RET ret; + if (pos_fid >= rawml->frag->entries_count) { + debug_print("Entry for pos:fid:%zu doesn't exist\n", pos_fid); + return MOBI_DATA_CORRUPT; + } + const MOBIIndexEntry *entry = &rawml->frag->entries[pos_fid]; + if (entry == NULL) { + debug_print("Fragment entry for pos:fid:%zu not found\n", pos_fid); + return MOBI_DATA_CORRUPT; + } + *offset = strtoul(entry->label, NULL, 10); + uint32_t file_nr; + ret = mobi_get_indxentry_tagvalue(&file_nr, entry, INDX_TAG_FRAG_FILE_NR); + if (ret != MOBI_SUCCESS) { + return ret; + } + if (file_nr >= rawml->skel->entries_count) { + debug_print("Entry for skeleton part no %u doesn't exist\n", file_nr); + return MOBI_DATA_CORRUPT; + + } + const MOBIIndexEntry *skel_entry = &rawml->skel->entries[file_nr]; + uint32_t skel_position; + ret = mobi_get_indxentry_tagvalue(&skel_position, skel_entry, INDX_TAG_SKEL_POSITION); + if (ret != MOBI_SUCCESS) { + return ret; + } + *offset -= skel_position; + *offset += pos_off; + *file_number = file_nr; + return MOBI_SUCCESS; +} + +/** + @brief Get value of the closest "aid" attribute following given offset in a given part + + @param[in,out] aid String value of "aid" attribute + @param[in] html MOBIPart html part + @param[in] offset Offset from the beginning of the part data + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_aid_by_offset(char *aid, const MOBIPart *html, const size_t offset) { + if (!aid || !html) { + debug_print("Parameter error (aid (%p), html (%p)\n", aid, (void *) html); + return MOBI_PARAM_ERR; + } + if (offset > html->size) { + debug_print("Parameter error: offset (%zu) > part size (%zu)\n", offset, html->size); + return MOBI_PARAM_ERR; + } + const unsigned char *data = html->data; + data += offset; + size_t length = html->size - offset + 1; + + size_t off = mobi_get_attribute_value(aid, data, length, "aid", true); + if (off == SIZE_MAX) { + return MOBI_DATA_CORRUPT; + } + return MOBI_SUCCESS; +} + +/** + @brief Get value of the closest "id" attribute following given offset in a given part + + @param[in,out] id String value of "id" attribute + @param[in] html MOBIPart html part + @param[in] offset Offset from the beginning of the part data + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_id_by_offset(char *id, const MOBIPart *html, const size_t offset) { + if (!id || !html) { + debug_print("Parameter error (id (%p), html (%p)\n", id, (void *) html); + return MOBI_PARAM_ERR; + } + if (offset > html->size) { + debug_print("Parameter error: offset (%zu) > part size (%zu)\n", offset, html->size); + return MOBI_PARAM_ERR; + } + const unsigned char *data = html->data; + data += offset; + size_t length = html->size - offset + 1; + + size_t off = mobi_get_attribute_value(id, data, length, "id", true); + if (off == SIZE_MAX) { + id[0] = '\0'; + //return MOBI_DATA_CORRUPT; + } + return MOBI_SUCCESS; +} + +/** + @brief Convert kindle:pos:fid:x:off:y to html file number and closest "aid" attribute following the position + + @param[in,out] file_number Will be set to file number value + @param[in,out] aid String value of "aid" attribute + @param[in] rawml MOBIRawml parsed records structure + @param[in] pos_fid X value of pos:fid:x + @param[in] pos_off Y value of off:y + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_aid_by_posoff(uint32_t *file_number, char *aid, const MOBIRawml *rawml, const size_t pos_fid, const size_t pos_off) { + size_t offset; + MOBI_RET ret = mobi_get_offset_by_posoff(file_number, &offset, rawml, pos_fid, pos_off); + if (ret != MOBI_SUCCESS) { + return MOBI_DATA_CORRUPT; + } + const MOBIPart *html = mobi_get_part_by_uid(rawml, *file_number); + if (html == NULL) { + return MOBI_DATA_CORRUPT; + } + ret = mobi_get_aid_by_offset(aid, html, offset); + if (ret != MOBI_SUCCESS) { + return MOBI_DATA_CORRUPT; + } + return MOBI_SUCCESS; +} + +/** + @brief Convert kindle:pos:fid:x:off:y to html file number and closest "id" attribute following the position + + @param[in,out] file_number Will be set to file number value + @param[in,out] id String value of "id" attribute + @param[in] rawml MOBIRawml parsed records structure + @param[in] pos_fid X value of pos:fid:x + @param[in] pos_off Y value of off:y + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_id_by_posoff(uint32_t *file_number, char *id, const MOBIRawml *rawml, const size_t pos_fid, const size_t pos_off) { + size_t offset; + MOBI_RET ret = mobi_get_offset_by_posoff(file_number, &offset, rawml, pos_fid, pos_off); + if (ret != MOBI_SUCCESS) { + return MOBI_DATA_CORRUPT; + } + const MOBIPart *html = mobi_get_part_by_uid(rawml, *file_number); + if (html == NULL) { + return MOBI_DATA_CORRUPT; + } + ret = mobi_get_id_by_offset(id, html, offset); + if (ret != MOBI_SUCCESS) { + return MOBI_DATA_CORRUPT; + } + return MOBI_SUCCESS; +} + +/** + @brief Parse resource records (images, fonts etc), determine their type, link to rawml + + @param[in] m MOBIData structure with loaded Record(s) 0 headers + @param[in,out] rawml Structure rawml->resources will be filled with parsed resources metadata and linked records data + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_reconstruct_resources(const MOBIData *m, MOBIRawml *rawml) { + size_t first_res_seqnumber = mobi_get_first_resource_record(m); + if (first_res_seqnumber == MOBI_NOTSET) { + /* search all records */ + first_res_seqnumber = 0; + } + const MOBIPdbRecord *curr_record = mobi_get_record_by_seqnumber(m, first_res_seqnumber); + if (curr_record == NULL) { + debug_print("First resource record not found at %zu\n", first_res_seqnumber); + return MOBI_DATA_CORRUPT; + } + rawml->resources = calloc(1, sizeof(MOBIPart)); + if (rawml->resources == NULL) { + debug_print("%s", "Memory allocation for resources part failed\n"); + return MOBI_MALLOC_FAILED; + } + MOBIPart *curr_part = rawml->resources; + size_t i = 0; + int parts_count = 0; + while (curr_record != NULL) { + const MOBIFiletype filetype = mobi_determine_resource_type(curr_record); + if (filetype == T_UNKNOWN) { + curr_record = curr_record->next; + i++; + continue; + } + if (filetype == T_BREAK) { + break; + } + if (parts_count > 0) { + curr_part->next = calloc(1, sizeof(MOBIPart)); + if (curr_part->next == NULL) { + debug_print("%s", "Memory allocation for flow part failed\n"); + return MOBI_MALLOC_FAILED; + } + curr_part = curr_part->next; + } + + curr_part->data = curr_record->data; + curr_part->size = curr_record->size; + + MOBI_RET ret; + if (filetype == T_FONT) { + ret = mobi_add_font_resource(curr_part); + if (ret != MOBI_SUCCESS) { + printf("Decoding font resource failed\n"); + return ret; + } + } else if (filetype == T_AUDIO) { + ret = mobi_add_audio_resource(curr_part); + if (ret != MOBI_SUCCESS) { + printf("Decoding audio resource failed\n"); + return ret; + } + } else if (filetype == T_VIDEO) { + ret = mobi_add_video_resource(curr_part); + printf("Decoding video resource failed\n"); + if (ret != MOBI_SUCCESS) { + return ret; + } + } else { + curr_part->type = filetype; + } + + curr_part->uid = i; + curr_part->next = NULL; + curr_record = curr_record->next; + i++; + parts_count++; + } + if (parts_count == 0) { + free(rawml->resources); + rawml->resources = NULL; + } + return MOBI_SUCCESS; +} + +/** + @brief Parse Replica Print ebook (azw4). Extract pdf. + @todo Parse remaining data from the file + + @param[in,out] pdf Memory area will be filled with extracted pdf data + @param[in] text Raw decompressed text to be parsed + @param[in,out] length Text length. Will be updated with pdf_length on return + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_process_replica(unsigned char *pdf, const char *text, size_t *length) { + MOBI_RET ret = MOBI_SUCCESS; + MOBIBuffer *buf = buffer_init_null(*length); + if (buf == NULL) { + return MOBI_MALLOC_FAILED; + } + buf->data = (unsigned char*) text; + buf->offset = 12; + size_t pdf_offset = buffer_get32(buf); /* offset 12 */ + size_t pdf_length = buffer_get32(buf); /* 16 */ + if (pdf_length > *length) { + debug_print("PDF size from replica header too large: %zu", pdf_length); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; + } + buf->offset = pdf_offset; + buffer_getraw(pdf, buf, pdf_length); + ret = buf->error; + buffer_free_null(buf); + *length = pdf_length; + return ret; +} + +/** + @brief Parse raw text into flow parts + + @param[in,out] rawml Structure rawml->flow will be filled with parsed flow text parts + @param[in] text Raw decompressed text to be parsed + @param[in] length Text length + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_reconstruct_flow(MOBIRawml *rawml, const char *text, const size_t length) { + /* KF8 */ + if (rawml->fdst != NULL) { + rawml->flow = calloc(1, sizeof(MOBIPart)); + if (rawml->flow == NULL) { + debug_print("%s", "Memory allocation for flow part failed\n"); + return MOBI_MALLOC_FAILED; + } + /* split text into fdst structure parts */ + MOBIPart *curr = rawml->flow; + size_t i = 0; + const size_t section_count = rawml->fdst->fdst_section_count; + while (i < section_count) { + if (i > 0) { + curr->next = calloc(1, sizeof(MOBIPart)); + if (curr->next == NULL) { + debug_print("%s", "Memory allocation for flow part failed\n"); + return MOBI_MALLOC_FAILED; + } + curr = curr->next; + } + const uint32_t section_start = rawml->fdst->fdst_section_starts[i]; + const uint32_t section_end = rawml->fdst->fdst_section_ends[i]; + const size_t section_length = section_end - section_start; + unsigned char *section_data = malloc(section_length); + if (section_data == NULL) { + debug_print("%s", "Memory allocation failed\n"); + return MOBI_MALLOC_FAILED; + } + memcpy(section_data, (text + section_start), section_length); + curr->uid = i; + curr->data = section_data; + curr->type = mobi_determine_flowpart_type(rawml, i); + curr->size = section_length; + curr->next = NULL; + i++; + } + } else { + /* No FDST or FDST parts count = 1 */ + /* single flow part */ + rawml->flow = calloc(1, sizeof(MOBIPart)); + if (rawml->flow == NULL) { + debug_print("%s", "Memory allocation for flow part failed\n"); + return MOBI_MALLOC_FAILED; + } + MOBIPart *curr = rawml->flow; + size_t section_length = 0; + MOBIFiletype section_type = T_HTML; + unsigned char *section_data; + /* check if raw text is Print Replica */ + if (memcmp(text, REPLICA_MAGIC, 4) == 0) { + debug_print("%s", "Print Replica book\n"); + /* print replica */ + unsigned char *pdf = malloc(length); + section_length = length; + section_type = T_PDF; + const MOBI_RET ret = mobi_process_replica(pdf, text, §ion_length); + if (ret != MOBI_SUCCESS) { + free(pdf); + return ret; + } + section_data = malloc(section_length); + if (section_data == NULL) { + debug_print("%s", "Memory allocation failed\n"); + free(pdf); + return MOBI_MALLOC_FAILED; + } + memcpy(section_data, pdf, section_length); + free(pdf); + } else { + /* text data */ + section_length = strlen(text); + section_data = malloc(section_length); + if (section_data == NULL) { + debug_print("%s", "Memory allocation failed\n"); + return MOBI_MALLOC_FAILED; + } + memcpy(section_data, text, section_length); + } + curr->uid = 0; + curr->data = section_data; + curr->type = section_type; + curr->size = section_length; + curr->next = NULL; + } + return MOBI_SUCCESS; +} + +/** + @brief Parse raw html into html parts. Use index entries if present to parse file + + @param[in,out] rawml Structure rawml->markup will be filled with reconstructed html parts + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_reconstruct_parts(MOBIRawml *rawml) { + MOBI_RET ret; + if (rawml->flow == NULL) { + debug_print("%s", "Flow structure not initialized\n"); + return MOBI_INIT_FAILED; + } + /* take first part, xhtml */ + MOBIBuffer *buf = buffer_init_null(rawml->flow->size); + buf->data = rawml->flow->data; + rawml->markup = calloc(1, sizeof(MOBIPart)); + if (rawml->markup == NULL) { + debug_print("%s", "Memory allocation for markup part failed\n"); + buffer_free_null(buf); + return MOBI_MALLOC_FAILED; + } + MOBIPart *curr = rawml->markup; + /* not skeleton data, just copy whole part to markup */ + if (rawml->skel == NULL) { + unsigned char *data = malloc(buf->maxlen); + if (data == NULL) { + debug_print("%s", "Memory allocation failed\n"); + buffer_free_null(buf); + return MOBI_MALLOC_FAILED; + } + memcpy(data, buf->data, buf->maxlen); + curr->uid = 0; + curr->size = buf->maxlen; + curr->data = data; + curr->type = rawml->flow->type; + curr->next = NULL; + buffer_free_null(buf); + return MOBI_SUCCESS; + } + /* parse skeleton data */ + size_t i = 0, j = 0; + while (i < rawml->skel->entries_count) { + const MOBIIndexEntry *entry = &rawml->skel->entries[i]; + uint32_t fragments_count; + ret = mobi_get_indxentry_tagvalue(&fragments_count, entry, INDX_TAG_SKEL_COUNT); + if (ret != MOBI_SUCCESS) { + buffer_free_null(buf); + return ret; + } + uint32_t skel_position; + ret = mobi_get_indxentry_tagvalue(&skel_position, entry, INDX_TAG_SKEL_POSITION); + if (ret != MOBI_SUCCESS) { + buffer_free_null(buf); + return ret; + } + uint32_t skel_length; + ret = mobi_get_indxentry_tagvalue(&skel_length, entry, INDX_TAG_SKEL_LENGTH); + if (ret != MOBI_SUCCESS) { + buffer_free_null(buf); + return ret; + } + debug_print("%zu\t%s\t%i\t%i\t%i\n", i, entry->label, fragments_count, skel_position, skel_length); + char *skel_text = malloc(skel_length + 1); + buf->offset = skel_position; + buffer_getstring(skel_text, buf, skel_length); + while (fragments_count--) { + entry = &rawml->frag->entries[j]; + uint32_t insert_position = (uint32_t) strtoul(entry->label, NULL, 10); + insert_position -= skel_position; + uint32_t cncx_offset; + ret = mobi_get_indxentry_tagvalue(&cncx_offset, entry, INDX_TAG_FRAG_AID_CNCX); + if (ret != MOBI_SUCCESS) { + free(skel_text); + buffer_free_null(buf); + return ret; + } + uint32_t file_number; + ret = mobi_get_indxentry_tagvalue(&file_number, entry, INDX_TAG_FRAG_FILE_NR); + if (ret != MOBI_SUCCESS) { + free(skel_text); + buffer_free_null(buf); + return ret; + } + uint32_t seq_number; + ret = mobi_get_indxentry_tagvalue(&seq_number, entry, INDX_TAG_FRAG_SEQUENCE_NR); + if (ret != MOBI_SUCCESS) { + free(skel_text); + buffer_free_null(buf); + return ret; + } + uint32_t frag_position; + ret = mobi_get_indxentry_tagvalue(&frag_position, entry, INDX_TAG_FRAG_POSITION); + if (ret != MOBI_SUCCESS) { + free(skel_text); + buffer_free_null(buf); + return ret; + } + uint32_t frag_length; + ret = mobi_get_indxentry_tagvalue(&frag_length, entry, INDX_TAG_FRAG_LENGTH); + if (ret != MOBI_SUCCESS) { + free(skel_text); + buffer_free_null(buf); + return ret; + } + /* FIXME: aid_text is unused */ + const MOBIPdbRecord *cncx_record = rawml->frag->cncx_record; + char *aid_text = mobi_get_cncx_string(cncx_record, cncx_offset); + if (file_number != i) { + debug_print("%s", "SKEL part number and fragment sequence number don't match\n"); + free(aid_text); + free(skel_text); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; + } + debug_print("posfid[%zu]\t%i\t%i\t%s\t%i\t%i\t%i\t%i\n", j, insert_position, cncx_offset, aid_text, file_number, seq_number, frag_position, frag_length); + free(aid_text); + char *tmp = realloc(skel_text, (skel_length + frag_length + 1)); + if (tmp == NULL) { + free(skel_text); + buffer_free_null(buf); + return MOBI_MALLOC_FAILED; + } + skel_text = tmp; + size_t skel_end_length = skel_length - insert_position; + char skel_text_end[skel_end_length + 1]; + strncpy(skel_text_end, skel_text + insert_position, skel_end_length); + skel_text_end[skel_end_length] = '\0'; + skel_text[insert_position] = '\0'; + buffer_appendstring(skel_text, buf, frag_length); + skel_length += frag_length; + strncat(skel_text, skel_text_end, skel_length + 1); + j++; + + } + if (i > 0) { + curr->next = calloc(1, sizeof(MOBIPart)); + if (curr->next == NULL) { + debug_print("%s", "Memory allocation for markup part failed\n"); + buffer_free_null(buf); + return MOBI_MALLOC_FAILED; + } + curr = curr->next; + } + curr->uid = i; + curr->size = skel_length; + curr->data = (unsigned char *) skel_text; + curr->type = T_HTML; + curr->next = NULL; + i++; + } + buffer_free_null(buf); + return MOBI_SUCCESS; +} + +/** + @brief Skan html part and build arrays of filepos link target offsets and attribute offsets. + + If links or offsets array is null it will not be filled with data + + @param[in,out] links MOBIArray structure for link target offsets array + @param[in] part MOBIPart html part structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_filepos_array(MOBIArray *links, const MOBIPart *part) { + if (!links || !part) { + return MOBI_INIT_FAILED; + } + size_t offset = 0; + size_t size = part->size; + unsigned char *data = part->data; + while (true) { + char val[MOBI_ATTRVALUE_MAXSIZE]; + size -= offset; + data += offset; + offset = mobi_get_attribute_value(val, data, size, "filepos", false); + if (offset == SIZE_MAX) { break; } + size_t filepos = strtoul(val, NULL, 10); + if (filepos > UINT32_MAX) { + return MOBI_DATA_CORRUPT; + } + MOBI_RET ret = array_insert(links, (uint32_t) filepos); + if (ret != MOBI_SUCCESS) { + return ret; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Skan ncx part and build array of filepos link target offsets. + + @param[in,out] links MOBIArray structure for link target offsets array + @param[in] part MOBIPart html part structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_ncx_filepos_array(MOBIArray *links, const MOBIPart *part) { + if (!links || !part) { + return MOBI_PARAM_ERR; + } + while ((part = part->next) != NULL) { + if (part->type == T_NCX) { + size_t offset = 0; + size_t size = part->size; + unsigned char *data = part->data; + while (true) { + char val[MOBI_ATTRVALUE_MAXSIZE]; + size -= offset; + data += offset; + offset = mobi_get_attribute_value(val, data, size, "src", false); + if (offset == SIZE_MAX) { break; } + /* part00000.html#0000000000 */ + uint32_t filepos = 0; + sscanf(val + 15, "%10u", &filepos); + MOBI_RET ret = array_insert(links, filepos); + if (ret != MOBI_SUCCESS) { + return ret; + } + } + } + } + return MOBI_SUCCESS; +} + +/** + @brief Replace kindle:pos link with html href + + @param[in,out] link Memory area which will be filled with "part00000.html#customid", including quotation marks + @param[in] rawml Structure rawml will be filled with reconstructed parts and resources + @param[in] value String kindle:pos:fid:0000:off:0000000000, without quotation marks + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_posfid_to_link(char *link, const MOBIRawml *rawml, const char *value) { + /* "kindle:pos:fid:0000:off:0000000000" */ + /* extract fid and off */ + value += 15; /* strlen("kindle:pos:fid:"); */ + char str_fid[4 + 1]; + strncpy(str_fid, value, 4); + str_fid[4] = '\0'; + char str_off[10 + 1]; + value += 9; /* strlen("0001:off:"); */ + strncpy(str_off, value, 10); + str_off[10] = '\0'; + + /* get file number and id value */ + uint32_t pos_off, pos_fid; + MOBI_RET ret = mobi_base32_decode(&pos_off, str_off); + if (ret != MOBI_SUCCESS) { + return ret; + } + ret = mobi_base32_decode(&pos_fid, str_fid); + if (ret != MOBI_SUCCESS) { + return ret; + } + uint32_t part_id; + char id[MOBI_ATTRVALUE_MAXSIZE + 1]; + ret = mobi_get_id_by_posoff(&part_id, id, rawml, pos_fid, pos_off); + if (ret != MOBI_SUCCESS) { + return ret; + } + /* FIXME: pos_off == 0 means top of file? */ + if (pos_off) { + snprintf(link, MOBI_ATTRVALUE_MAXSIZE, "\"part%05u.html#%s\"", part_id, id); + } else { + snprintf(link, MOBI_ATTRVALUE_MAXSIZE, "\"part%05u.html\"", part_id); + } + return MOBI_SUCCESS; +} + +/** + @brief Replace kindle:flow link with html href + + @param[in,out] link Memory area which will be filled with "part00000.ext", including quotation marks + @param[in] rawml Structure rawml will be filled with reconstructed parts and resources + @param[in] value String kindle:flow:0000?mime=type, without quotation marks + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_flow_to_link(char *link, const MOBIRawml *rawml, const char *value) { + /* "kindle:flow:0000?mime=" */ + value += 12; /* strlen("kindle:flow:"); */ + char str_fid[4 + 1]; + strncpy(str_fid, value, 4); + str_fid[4] = '\0'; + + /* get file number */ + uint32_t part_id; + MOBI_RET ret = mobi_base32_decode(&part_id, str_fid); + if (ret != MOBI_SUCCESS) { + return ret; + } + MOBIPart *flow = mobi_get_flow_by_uid(rawml, part_id); + MOBIFileMeta meta = mobi_get_filemeta_by_type(flow->type); + char *extension = meta.extension; + snprintf(link, MOBI_ATTRVALUE_MAXSIZE, "\"flow%05u.%s\"", part_id, extension); + return MOBI_SUCCESS; +} + +/** + @brief Replace kindle:embed link with html href + + @param[in,out] link Memory area which will be filled with "resource00000.ext", including quotation marks + @param[in] rawml Structure rawml will be filled with reconstructed parts and resources + @param[in] value String kindle:embed:0000?mime=type, with optional quotation marks + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_embed_to_link(char *link, const MOBIRawml *rawml, const char *value) { + /* "kindle:embed:0000?mime=" */ + /* skip quotation marks or spaces */ + while (*value == '"' || *value == '\'' || isspace(*value)) { + value++; + } + value += strlen("kindle:embed:"); + char str_fid[4 + 1]; + strncpy(str_fid, value, 4); + str_fid[4] = '\0'; + + /* get file number */ + uint32_t part_id; + MOBI_RET ret = mobi_base32_decode(&part_id, str_fid); + if (ret != MOBI_SUCCESS) { + return ret; + } + part_id--; + MOBIPart *resource = mobi_get_resource_by_uid(rawml, part_id); + MOBIFileMeta meta = mobi_get_filemeta_by_type(resource->type); + char *extension = meta.extension; + snprintf(link, MOBI_ATTRVALUE_MAXSIZE, "\"resource%05u.%s\"", part_id, extension); + return MOBI_SUCCESS; +} + +/** + @brief Structure for links reconstruction. + + Linked list of Fragment structures forms whole document part + */ +typedef struct MOBIFragment { + unsigned char *fragment; /**< Fragment data */ + size_t size; /**< Fragment size */ + bool is_malloc; /**< Is it needed to free this fragment or is it just an alias to part data */ + struct MOBIFragment *next; /**< Link to next fragment */ +} MOBIFragment; + +/** + @brief Allocate fragment, fill with data, append to linked list + + @param[in] curr Last fragment in linked list + @param[in] fragment Fragment data + @param[in] size Size data + @param[in] is_malloc is_maloc data + @return Fragment structure filled with data + */ +static MOBIFragment * mobi_list_add(MOBIFragment *curr, unsigned char *fragment, const size_t size, const bool is_malloc) { + curr->next = calloc(1, sizeof(MOBIFragment)); + if (curr->next == NULL) { + return NULL; + } + MOBIFragment *next = curr->next; + next->fragment = fragment; + next->size = size; + next->is_malloc = is_malloc; + return next; +} + +/** + @brief Allocate fragment, fill with data and return + + @param[in] fragment Fragment data + @param[in] size Size data + @param[in] is_malloc is_maloc data + @return Fragment structure filled with data + */ +static MOBIFragment * mobi_list_init(unsigned char *fragment, const size_t size, const bool is_malloc) { + MOBIFragment *curr = calloc(1, sizeof(MOBIFragment)); + if (curr == NULL) { + return NULL; + } + curr->fragment = fragment; + curr->size = size; + curr->is_malloc = is_malloc; + return curr; +} + +/** + @brief Delete fragment from linked list + + @param[in] curr Fragment to be deleted + @return Next fragment in the linked list or NULL if absent + */ +static MOBIFragment * mobi_list_del(MOBIFragment *curr) { + MOBIFragment *del = curr; + curr = curr->next; + if (del->is_malloc) { + free(del->fragment); + } + free(del); + return curr; +} + +/** + @brief Replace offset-links with html-links in KF8 markup + + @param[in,out] rawml Structure rawml will be filled with reconstructed parts and resources + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_reconstruct_links_kf8(const MOBIRawml *rawml) { + MOBIResult result; + + typedef struct NEWData { + size_t part_group; + size_t part_uid; + MOBIFragment *list; + size_t size; + struct NEWData *next; + } NEWData; + + NEWData *partdata = NULL; + NEWData *curdata = NULL; + MOBIPart *parts[] = { + rawml->markup, /* html files */ + rawml->flow->next /* css, skip first unparsed html part */ + }; + size_t i; + for (i = 0; i < 2; i++) { + MOBIPart *part = parts[i]; + while (part) { + unsigned char *data_in = part->data; + result.start = part->data; + const unsigned char *data_end = part->data + part->size; + MOBIFragment *first = NULL; + MOBIFragment *curr = NULL; + size_t part_size = 0; + while (true) { + mobi_search_links_kf8(&result, result.start, data_end, part->type); + if (result.start == NULL) { + break; + } + char *value = (char *) result.value; + unsigned char *data_cur = result.start; + char *target = NULL; + if (data_cur < data_in) { + return MOBI_DATA_CORRUPT; + } + size_t size = (size_t) (data_cur - data_in); + char link[MOBI_ATTRVALUE_MAXSIZE + 1]; + if ((target = strstr(value, "kindle:pos:fid:")) != NULL) { + /* "kindle:pos:fid:0001:off:0000000000" */ + /* replace link with href="part00000.html#00" */ + MOBI_RET ret = mobi_posfid_to_link(link, rawml, target); + if (ret != MOBI_SUCCESS) { + return ret; + } + } else if ((target = strstr(value, "kindle:flow:")) != NULL) { + /* kindle:flow:0000?mime=text/css */ + /* replace link with href="flow00000.ext" */ + MOBI_RET ret = mobi_flow_to_link(link, rawml, target); + if (ret != MOBI_SUCCESS) { + return ret; + } + } else if ((target = strstr(value, "kindle:embed:")) != NULL) { + /* kindle:embed:0000?mime=image/jpg */ + /* replace link with href="resource00000.ext" */ + MOBI_RET ret = mobi_embed_to_link(link, rawml, target); + if (ret != MOBI_SUCCESS) { + return ret; + } + } + if (target) { + /* first chunk */ + if (!curr) { + curr = mobi_list_init(data_in, size, false); + if (curr == NULL) { + return MOBI_MALLOC_FAILED; + } + first = curr; + } else { + curr = mobi_list_add(curr, data_in, size, false); + if (curr == NULL) { + return MOBI_MALLOC_FAILED; + } + } + part_size += curr->size; + /* second chunk */ + /* strip quotes if is_url */ + curr = mobi_list_add(curr, + (unsigned char *) strdup(link + result.is_url), + strlen(link) - 2 * result.is_url, true); + if (curr == NULL) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_MALLOC_FAILED; + } + part_size += curr->size; + data_in = result.end; + } + } + if (first && first->fragment) { + /* last chunk */ + if (part->data + part->size < data_in) { + return MOBI_DATA_CORRUPT; + } + size_t size = (size_t) (part->data + part->size - data_in); + curr = mobi_list_add(curr, data_in, size, false); + if (curr == NULL) { + return MOBI_MALLOC_FAILED; + } + part_size += curr->size; + /* save */ + if (!curdata) { + curdata = calloc(1, sizeof(NEWData)); + partdata = curdata; + } else { + curdata->next = calloc(1, sizeof(NEWData)); + curdata = curdata->next; + } + curdata->part_group = i; + curdata->part_uid = part->uid; + curdata->list = first; + curdata->size = part_size; + } + part = part->next; + } + } + /* now update parts */ + for (i = 0; i < 2; i++) { + MOBIPart *part = parts[i]; + while (part) { + if (partdata && part->uid == partdata->part_uid && i == partdata->part_group) { + unsigned char *new_data = malloc((size_t) partdata->size); + unsigned char *data_out = new_data; + MOBIFragment *fragdata = partdata->list; + while (fragdata) { + memcpy(data_out, fragdata->fragment, fragdata->size); + data_out += fragdata->size; + fragdata = mobi_list_del(fragdata); + } + free(part->data); + part->data = new_data; + part->size = (size_t) partdata->size; + NEWData *partused = partdata; + partdata = partdata->next; + free(partused); + } + part = part->next; + } + } + return MOBI_SUCCESS; +} + +/** + @brief Replace offset-links with html-links in KF7 markup + + @param[in,out] rawml Structure rawml will be filled with reconstructed parts and resources + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_reconstruct_links_kf7(const MOBIRawml *rawml) { + MOBIResult result; + MOBIArray *links = array_init(25); + if (links == NULL) { + return MOBI_MALLOC_FAILED; + } + MOBIPart *part = rawml->markup; + /* get array of link target offsets */ + MOBI_RET ret = mobi_get_filepos_array(links, part); + if (ret != MOBI_SUCCESS) { + array_free(links); + return ret; + } + ret = mobi_get_ncx_filepos_array(links, part); + if (ret != MOBI_SUCCESS) { + array_free(links); + return ret; + } + if (array_size(links) == 0) { + debug_print("%s\n", "No filepos links found"); + array_free(links); + return MOBI_SUCCESS; + } + array_sort(links, true); + /* build MOBIResult list */ + unsigned char *data_in = part->data; + result.start = part->data; + const unsigned char *data_end = part->data + part->size; + MOBIFragment *first = NULL; + MOBIFragment *curr = NULL; + size_t new_size = 0; + size_t i = 0; + while (true) { + mobi_search_links_kf7(&result, result.start, data_end); + if (result.start == NULL) { + break; + } + char *attribute = (char *) result.value; + unsigned char *data_cur = result.start; + char link[MOBI_ATTRVALUE_MAXSIZE]; + const char *numbers = "0123456789"; + char *value = strpbrk(attribute, numbers); + if (value == NULL) { + debug_print("Unknown link target: %s\n", attribute); + while (first) { + first = mobi_list_del(first); + } + return MOBI_DATA_CORRUPT; + } + size_t target; + switch (attribute[0]) { + case 'f': + /* filepos=0000000000 */ + /* replace link with href="#0000000000" */ + target = strtoul(value, NULL, 10); + snprintf(link, MOBI_ATTRVALUE_MAXSIZE, "href=\"#%010u\"", (uint32_t)target); + break; + case 'r': + /* recindex="00000" */ + /* replace link with src="resource00000.ext" */ + target = strtoul(value, NULL, 10); + if (target > 0) { + target--; + } + MOBIFiletype filetype = mobi_get_resourcetype_by_uid(rawml, target); + MOBIFileMeta filemeta = mobi_get_filemeta_by_type(filetype); + snprintf(link, MOBI_ATTRVALUE_MAXSIZE, "src=\"resource%05u.%s\"", (uint32_t) target, filemeta.extension); + break; + default: + debug_print("Unknown link target: %s\n", attribute); + while (first) { + first = mobi_list_del(first); + } + return MOBI_DATA_CORRUPT; + break; + } + + /* insert chunks from links array */ + while (i < links->size) { + const uint32_t offset = links->data[i]; + unsigned char *data_links = part->data + offset; + if (data_links > result.start) { + break; + } + /* first chunk */ + if (data_links < data_in) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_DATA_CORRUPT; + } + size_t chunk_size = (size_t) (data_links - data_in); + if (!curr) { + curr = mobi_list_init(data_in, chunk_size, false); + if (curr == NULL) { + return MOBI_MALLOC_FAILED; + } + first = curr; + } else { + curr = mobi_list_add(curr, data_in, chunk_size, false); + if (curr == NULL) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_MALLOC_FAILED; + } + } + data_in = data_links; + new_size += curr->size; + /* second chunk */ + char anchor[MOBI_ATTRVALUE_MAXSIZE]; + snprintf(anchor, MOBI_ATTRVALUE_MAXSIZE, "", offset); + curr = mobi_list_add(curr, + (unsigned char *) strdup(anchor), + strlen(anchor), true); + if (curr == NULL) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_MALLOC_FAILED; + } + new_size += curr->size; + i++; + } + if (data_cur < data_in) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_DATA_CORRUPT; + } + size_t size = (size_t) (data_cur - data_in); + if (!curr) { + curr = mobi_list_init(data_in, size, false); + if (curr == NULL) { + return MOBI_MALLOC_FAILED; + } + first = curr; + } else { + curr = mobi_list_add(curr, data_in, size, false); + if (curr == NULL) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_MALLOC_FAILED; + } + } + new_size += curr->size; + /* second chunk */ + curr = mobi_list_add(curr, + (unsigned char *) strdup(link), + strlen(link), true); + if (curr == NULL) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_MALLOC_FAILED; + } + new_size += curr->size; + data_in = result.end; + } + /* insert remaining chunks from links array */ + while (i < links->size) { + const uint32_t offset = links->data[i]; + unsigned char *data_links = part->data + offset; + if (data_links > part->data + part->size) { + break; + } + /* first chunk */ + if (data_links < data_in) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_DATA_CORRUPT; + } + size_t chunk_size = (size_t) (data_links - data_in); + if (!curr) { + curr = mobi_list_init(data_in, chunk_size, false); + if (curr == NULL) { + return MOBI_MALLOC_FAILED; + } + first = curr; + } else { + curr = mobi_list_add(curr, data_in, chunk_size, false); + if (curr == NULL) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_MALLOC_FAILED; + } + } + data_in = data_links; + new_size += curr->size; + /* second chunk */ + char anchor[MOBI_ATTRVALUE_MAXSIZE]; + snprintf(anchor, MOBI_ATTRVALUE_MAXSIZE, "", offset); + curr = mobi_list_add(curr, + (unsigned char *) strdup(anchor), + strlen(anchor), true); + if (curr == NULL) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_MALLOC_FAILED; + } + new_size += curr->size; + i++; + } + array_free(links); + if (first && first->fragment) { + /* last chunk */ + if (part->data + part->size < data_in) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_DATA_CORRUPT; + } + size_t size = (size_t) (part->data + part->size - data_in); + curr = mobi_list_add(curr, data_in, size, false); + if (curr == NULL) { + while (first) { + first = mobi_list_del(first); + } + return MOBI_MALLOC_FAILED; + } + new_size += curr->size; + /* save */ + unsigned char *new_data = malloc((size_t) new_size); + unsigned char *data_out = new_data; + MOBIFragment *fragdata = first; + while (fragdata) { + memcpy(data_out, fragdata->fragment, fragdata->size); + data_out += fragdata->size; + fragdata = mobi_list_del(fragdata); + } + free(part->data); + part->data = new_data; + part->size = (size_t) new_size; + } + return MOBI_SUCCESS; +} + +/** + @brief Replace offset-links with html-links + + @param[in,out] rawml Structure rawml will be filled with reconstructed parts and resources + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_reconstruct_links(const MOBIRawml *rawml) { + if (rawml == NULL) { + debug_print("%s\n", "Rawml not initialized\n"); + return MOBI_INIT_FAILED; + } + MOBI_RET ret; + if (rawml->version != MOBI_NOTSET && rawml->version >= 8) { + /* kf8 gimnastics */ + ret = mobi_reconstruct_links_kf8(rawml); + } else { + /* kf7 format and older */ + ret = mobi_reconstruct_links_kf7(rawml); + } + return ret; +} + +/** + @brief Parse raw records into html flow parts, markup parts, resources and indices + + @param[in,out] rawml Structure rawml will be filled with reconstructed parts and resources + @param[in] m MOBIData structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_rawml(MOBIRawml *rawml, const MOBIData *m) { + + MOBI_RET ret; + if (m == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; + } + if (rawml == NULL) { + return MOBI_INIT_FAILED; + } + + /* Get maximal size of text data */ + const size_t maxlen = mobi_get_text_maxsize(m); + char *text = malloc(maxlen + 1); + if (text == NULL) { + debug_print("%s", "Memory allocation failed\n"); + return MOBI_MALLOC_FAILED; + } + /* Extract text records, unpack, merge and copy it to text string */ + size_t length = maxlen; + ret = mobi_get_rawml(m, text, &length); + if (ret != MOBI_SUCCESS) { + debug_print("%s", "Error parsing text\n"); + free(text); + return MOBI_MALLOC_FAILED; + } + /* Work on utf-8 encoded text */ + if (memcmp(text, REPLICA_MAGIC, 4) != 0 && mobi_is_cp1252(m)) { + /* extreme case in which each input character is converted + to 3-byte utf-8 sequence */ + size_t out_length = 3 * length + 1; + char *out_text = malloc(out_length); + if (out_text == NULL) { + debug_print("%s", "Memory allocation failed\n"); + free(text); + return MOBI_MALLOC_FAILED; + } + ret = mobi_cp1252_to_utf8(out_text, text, &out_length, length); + free(text); + if (ret != MOBI_SUCCESS || out_length == 0) { + debug_print("%s", "conversion from cp1252 to utf8 failed\n"); + free(out_text); + return ret; + } + text = malloc(out_length + 1); + if (text == NULL) { + debug_print("%s", "Memory allocation failed\n"); + free(out_text); + return MOBI_MALLOC_FAILED; + } + memcpy(text, out_text, out_length); + free(out_text); + text[out_length] = '\0'; + length = out_length; + } + + if (mobi_exists_fdst(m)) { + /* Skip parsing if section count less than 1 */ + if (m->mh->fdst_section_count && *m->mh->fdst_section_count > 1) { + ret = mobi_parse_fdst(m, rawml); + if (ret != MOBI_SUCCESS) { + free(text); + return ret; + } + } + } + ret = mobi_reconstruct_flow(rawml, text, length); + free(text); + if (ret != MOBI_SUCCESS) { + return ret; + } + ret = mobi_reconstruct_resources(m, rawml); + if (ret != MOBI_SUCCESS) { + return ret; + } + const size_t offset = mobi_get_kf8offset(m); + /* skeleton index */ + if (mobi_exists_skel_indx(m) && mobi_exists_frag_indx(m)) { + const size_t indx_record_number = *m->mh->skeleton_index + offset; + /* to be freed in mobi_free_rawml */ + MOBIIndx *skel_meta = mobi_init_indx(); + ret = mobi_parse_index(m, skel_meta, indx_record_number); + if (ret != MOBI_SUCCESS) { + return ret; + } + rawml->skel = skel_meta; + } + + /* fragment index */ + if (mobi_exists_frag_indx(m)) { + MOBIIndx *frag_meta = mobi_init_indx(); + const size_t indx_record_number = *m->mh->fragment_index + offset; + ret = mobi_parse_index(m, frag_meta, indx_record_number); + if (ret != MOBI_SUCCESS) { + return ret; + } + rawml->frag = frag_meta; + } + + /* guide index */ + if (mobi_exists_guide_indx(m)) { + MOBIIndx *guide_meta = mobi_init_indx(); + const size_t indx_record_number = *m->mh->guide_index + offset; + ret = mobi_parse_index(m, guide_meta, indx_record_number); + if (ret != MOBI_SUCCESS) { + return ret; + } + rawml->guide = guide_meta; + } + + /* ncx index */ + if (mobi_exists_ncx(m)) { + MOBIIndx *ncx_meta = mobi_init_indx(); + const size_t indx_record_number = *m->mh->ncx_index + offset; + ret = mobi_parse_index(m, ncx_meta, indx_record_number); + if (ret != MOBI_SUCCESS) { + return ret; + } + rawml->ncx = ncx_meta; + } + + /* orth index */ + /* FIXME: works only for old non-KF8 formats */ + if (rawml->version < 8 && mobi_exists_orth(m)) { + MOBIIndx *orth_meta = mobi_init_indx(); + const size_t indx_record_number = *m->mh->orth_index + offset; + ret = mobi_parse_index(m, orth_meta, indx_record_number); + if (ret != MOBI_SUCCESS) { + return ret; + } + rawml->orth = orth_meta; + } + + ret = mobi_reconstruct_parts(rawml); + if (ret != MOBI_SUCCESS) { + return ret; + } +#ifdef USE_LIBXML2 + ret = mobi_build_opf(rawml, m); + if (ret != MOBI_SUCCESS) { + return ret; + } +#endif + ret = mobi_reconstruct_links(rawml); + if (ret != MOBI_SUCCESS) { + return ret; + } + return MOBI_SUCCESS; +} + diff --git a/src/parse_rawml.h b/src/parse_rawml.h new file mode 100644 index 0000000..a790466 --- /dev/null +++ b/src/parse_rawml.h @@ -0,0 +1,33 @@ +/** @file parse_rawml.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + +#ifndef mobi_parse_rawml_h +#define mobi_parse_rawml_h + +#include "config.h" +#include "mobi.h" + +#define MOBI_ATTRNAME_MAXSIZE 100 /**< Maximum length of tag attribute name, like "href" */ +#define MOBI_ATTRVALUE_MAXSIZE 100 /**< Maximum length of tag attribute value */ + +/** + @brief Result data returned by mobi_search_links_kf7() and mobi_search_links_kf8() + */ +typedef struct { + unsigned char *start; /**< Beginning data to be replaced */ + unsigned char *end; /**< End of data to be replaced */ + char value[MOBI_ATTRVALUE_MAXSIZE + 1]; /**< Attribute value */ + bool is_url; /**< True if value is part of css url attribute */ +} MOBIResult; + +MOBI_RET mobi_get_id_by_posoff(uint32_t *file_number, char *id, const MOBIRawml *rawml, const size_t pos_fid, const size_t pos_off); +MOBI_RET mobi_search_markup(MOBIResult *result, const unsigned char *data_start, const unsigned char *data_end, const MOBIFiletype type, const char *needle); + +#endif diff --git a/src/read.c b/src/read.c index 761cca5..cb37a95 100644 --- a/src/read.c +++ b/src/read.c @@ -1,41 +1,54 @@ -// -// read.c -// mobi -// -// Created by Bartek on 26.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file read.c + * @brief Functions for reading and parsing of MOBI document + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ + #include +#include #include #include "read.h" +#include "util.h" +#include "index.h" +#include "debug.h" -int mobi_load_pdbheader(MOBIData *m, FILE *file) { - MOBIBuffer *buf; +/** + @brief Read palm database header from file into MOBIData structure (MOBIPdbHeader) + + @param[in,out] m MOBIData structure to be filled with read data + @param[in] file Filedescriptor to read from + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_load_pdbheader(MOBIData *m, FILE *file) { if (m == NULL) { - printf("Mobi structure not initialized\n"); - return MOBI_ERROR; + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; } if (!file) { - return MOBI_ERROR; + return MOBI_FILE_NOT_FOUND; } - buf = buffer_init(PALMDB_HEADER_LEN); + MOBIBuffer *buf = buffer_init(PALMDB_HEADER_LEN); if (buf == NULL) { - return MOBI_ERROR; + return MOBI_MALLOC_FAILED; } - size_t len = fread(buf->data, 1, PALMDB_HEADER_LEN, file); + const size_t len = fread(buf->data, 1, PALMDB_HEADER_LEN, file); if (len != PALMDB_HEADER_LEN) { buffer_free(buf); - return MOBI_ERROR; + return MOBI_DATA_CORRUPT; } m->ph = calloc(1, sizeof(MOBIPdbHeader)); if (m->ph == NULL) { - printf("Memory allocation for pdb header failed\n"); - return MOBI_ERROR; + debug_print("%s", "Memory allocation for pdb header failed\n"); + return MOBI_MALLOC_FAILED; } - // parse header + /* parse header */ buffer_getstring(m->ph->name, buf, PALMDB_NAME_SIZE_MAX); - m->ph->name[PALMDB_NAME_SIZE_MAX] = '\0'; m->ph->attributes = buffer_get16(buf); m->ph->version = buffer_get16(buf); m->ph->ctime = buffer_get32(buf); @@ -45,9 +58,7 @@ int mobi_load_pdbheader(MOBIData *m, FILE *file) { m->ph->appinfo_offset = buffer_get32(buf); m->ph->sortinfo_offset = buffer_get32(buf); buffer_getstring(m->ph->type, buf, 4); - m->ph->type[4] = '\0'; buffer_getstring(m->ph->creator, buf, 4); - m->ph->creator[4] = '\0'; m->ph->uid = buffer_get32(buf); m->ph->next_rec = buffer_get32(buf); m->ph->rec_count = buffer_get16(buf); @@ -55,157 +66,176 @@ int mobi_load_pdbheader(MOBIData *m, FILE *file) { return MOBI_SUCCESS; } -int mobi_load_reclist(MOBIData *m, FILE *file) { +/** + @brief Read list of database records from file into MOBIData structure (MOBIPdbRecord) + + @param[in,out] m MOBIData structure to be filled with read data + @param[in] file Filedescriptor to read from + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_load_reclist(MOBIData *m, FILE *file) { if (m == NULL) { - printf("Mobi structure not initialized\n"); - return MOBI_ERROR; + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; } if (!file) { - printf("File not ready\n"); - return MOBI_ERROR; + debug_print("%s", "File not ready\n"); + return MOBI_FILE_NOT_FOUND; } - int i; - MOBIBuffer *buf; - MOBIPdbRecord *curr; m->rec = calloc(1, sizeof(MOBIPdbRecord)); if (m->rec == NULL) { - printf("Memory allocation for pdb record failed\n"); - return MOBI_ERROR; + debug_print("%s", "Memory allocation for pdb record failed\n"); + return MOBI_MALLOC_FAILED; } - curr = m->rec; - for (i = 0; i < m->ph->rec_count; i++) { - buf = buffer_init(PDB_RECORD_INFO_SIZE); + MOBIPdbRecord *curr = m->rec; + for (int i = 0; i < m->ph->rec_count; i++) { + MOBIBuffer *buf = buffer_init(PALMDB_RECORD_INFO_SIZE); if (buf == NULL) { - return MOBI_ERROR; + return MOBI_MALLOC_FAILED; } - size_t len = fread(buf->data, 1, PDB_RECORD_INFO_SIZE, file); - if (len != PDB_RECORD_INFO_SIZE) { + const size_t len = fread(buf->data, 1, PALMDB_RECORD_INFO_SIZE, file); + if (len != PALMDB_RECORD_INFO_SIZE) { buffer_free(buf); - return MOBI_ERROR; + return MOBI_DATA_CORRUPT; } if (i > 0) { curr->next = calloc(1, sizeof(MOBIPdbRecord)); if (curr->next == NULL) { - printf("Memory allocation for pdb record failed\n"); - return MOBI_ERROR; + debug_print("%s", "Memory allocation for pdb record failed\n"); + return MOBI_MALLOC_FAILED; } curr = curr->next; } curr->offset = buffer_get32(buf); curr->attributes = buffer_get8(buf); - uint8_t h = buffer_get8(buf); - uint16_t l = buffer_get16(buf); - curr->uid = h << 16 | l; + const uint8_t h = buffer_get8(buf); + const uint16_t l = buffer_get16(buf); + curr->uid = (uint32_t) h << 16 | l; curr->next = NULL; buffer_free(buf); } return MOBI_SUCCESS; } -int mobi_load_recdata(MOBIData *m, FILE *file) { - MOBIPdbRecord *curr, *next; - int ret; +/** + @brief Read record data and size from file into MOBIData structure (MOBIPdbRecord) + + @param[in,out] m MOBIData structure to be filled with read data + @param[in] file Filedescriptor to read from + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_load_rec(MOBIData *m, FILE *file) { + MOBI_RET ret; if (m == NULL) { - printf("Mobi structure not initialized\n"); - return MOBI_ERROR; + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; } - curr = m->rec; + MOBIPdbRecord *curr = m->rec; while (curr != NULL) { + MOBIPdbRecord *next; size_t size; if (curr->next != NULL) { next = curr->next; size = next->offset - curr->offset; } else { fseek(file, 0, SEEK_END); - size = ftell(file) - curr->offset; + long diff = ftell(file) - curr->offset; + if (diff <= 0) { + debug_print("Wrong record size: %li\n", diff); + return MOBI_DATA_CORRUPT; + } + size = (size_t) diff; next = NULL; } curr->size = size; - ret = mobi_load_rec(curr, file); - if (ret == MOBI_ERROR) { - printf("Error loading record uid %i data\n", curr->uid); + ret = mobi_load_recdata(curr, file); + if (ret != MOBI_SUCCESS) { + debug_print("Error loading record uid %i data\n", curr->uid); mobi_free_rec(m); - return MOBI_ERROR; + return ret; } curr = next; } return MOBI_SUCCESS; } -int mobi_load_rec(MOBIPdbRecord *rec, FILE *file) { - size_t len; - int ret; - ret = fseek(file, rec->offset, SEEK_SET); +/** + @brief Read record data from file into MOBIPdbRecord structure + + @param[in,out] rec MOBIPdbRecord structure to be filled with read data + @param[in] file Filedescriptor to read from + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_load_recdata(MOBIPdbRecord *rec, FILE *file) { + const int ret = fseek(file, rec->offset, SEEK_SET); if (ret != 0) { - printf("Record %i not found\n", rec->uid); - return MOBI_ERROR; + debug_print("Record %i not found\n", rec->uid); + return MOBI_DATA_CORRUPT; } rec->data = malloc(rec->size); if (rec->data == NULL) { - printf("Memory allocation for pdb record data failed\n"); - return MOBI_ERROR; + debug_print("%s", "Memory allocation for pdb record data failed\n"); + return MOBI_MALLOC_FAILED; } - len = fread(rec->data, 1, rec->size, file); + const size_t len = fread(rec->data, 1, rec->size, file); if (len < rec->size) { - printf("Truncated data in record %i\n", rec->uid); - rec->size = len; - char *ptr = realloc(rec->data, len); - if (ptr) { - rec->data = ptr; - } + debug_print("Truncated data in record %i\n", rec->uid); + return MOBI_DATA_CORRUPT; } return MOBI_SUCCESS; } -int mobi_parse_extheader(MOBIData *m, MOBIBuffer *buf) { - size_t saved_maxlen; - char exth_magic[4]; - size_t exth_length; - size_t rec_count; +/** + @brief Parse EXTH header from Record 0 into MOBIData structure (MOBIExthHeader) + + @param[in,out] m MOBIData structure to be filled with parsed data + @param[in] buf MOBIBuffer buffer to read from + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_extheader(MOBIData *m, MOBIBuffer *buf) { if (m == NULL) { - printf("Mobi structure not initialized\n"); - return MOBI_ERROR; + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; } + char exth_magic[5]; buffer_getstring(exth_magic, buf, 4); - exth_length = buffer_get32(buf); - rec_count = buffer_get32(buf); + const size_t exth_length = buffer_get32(buf); + const size_t rec_count = buffer_get32(buf); if (strncmp(exth_magic, EXTH_MAGIC, 4) != 0 || exth_length + buf->offset + 8 > buf->maxlen || rec_count == 0) { - return MOBI_ERROR; + return MOBI_DATA_CORRUPT; } - saved_maxlen = buf->maxlen; + const size_t saved_maxlen = buf->maxlen; buf->maxlen = exth_length + buf->offset - 8; - m->eh = calloc(1, sizeof(MOBIExtHeader)); + m->eh = calloc(1, sizeof(MOBIExthHeader)); if (m->eh == NULL) { - printf("Memory allocation for EXTH header failed\n"); - return MOBI_ERROR; + debug_print("%s", "Memory allocation for EXTH header failed\n"); + return MOBI_MALLOC_FAILED; } - int i; - MOBIExtHeader *curr; - curr = m->eh; - for (i = 0; i < rec_count; i++) { + MOBIExthHeader *curr = m->eh; + for (size_t i = 0; i < rec_count; i++) { if (i > 0) { - curr->next = calloc(1, sizeof(MOBIExtHeader)); + curr->next = calloc(1, sizeof(MOBIExthHeader)); if (curr->next == NULL) { - printf("Memory allocation for EXTH header failed\n"); - return MOBI_ERROR; + debug_print("%s", "Memory allocation for EXTH header failed\n"); + return MOBI_MALLOC_FAILED; } curr = curr->next; } - curr->uid = buffer_get32(buf); - // data size = record size minus 8 bytes for uid and size + curr->tag = buffer_get32(buf); + /* data size = record size minus 8 bytes for uid and size */ curr->size = buffer_get32(buf) - 8; if (curr->size == 0) { - printf("Skip record %i, data too short\n", curr->uid); + debug_print("Skip record %i, data too short\n", curr->tag); continue; } curr->data = malloc(curr->size); if (curr->data == NULL) { - printf("Memory allocation for EXTH record %i failed\n", curr->uid); + debug_print("Memory allocation for EXTH record %i failed\n", curr->tag); mobi_free_eh(m); - return MOBI_ERROR; + return MOBI_MALLOC_FAILED; } buffer_getraw(curr->data, buf, curr->size); curr->next = NULL; @@ -214,344 +244,525 @@ int mobi_parse_extheader(MOBIData *m, MOBIBuffer *buf) { return MOBI_SUCCESS; } -int mobi_parse_mobiheader(MOBIData *m, MOBIBuffer *buf) { - size_t saved_maxlen; +/** + @brief Parse MOBI header from Record 0 into MOBIData structure (MOBIMobiHeader) + + @param[in,out] m MOBIData structure to be filled with parsed data + @param[in] buf MOBIBuffer buffer to read from + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_mobiheader(MOBIData *m, MOBIBuffer *buf) { + int isKF8 = 0; if (m == NULL) { - printf("Mobi structure not initialized\n"); - return MOBI_ERROR; + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; } m->mh = calloc(1, sizeof(MOBIMobiHeader)); if (m->mh == NULL) { - printf("Memory allocation for MOBI header failed\n"); - return MOBI_ERROR; + debug_print("%s", "Memory allocation for MOBI header failed\n"); + return MOBI_MALLOC_FAILED; } buffer_getstring(m->mh->mobi_magic, buf, 4); - m->mh->mobi_magic[4] = '\0'; - buffer_copy32(&m->mh->header_length, buf); + buffer_dup32(&m->mh->header_length, buf); if (strcmp(m->mh->mobi_magic, MOBI_MAGIC) != 0 || m->mh->header_length == NULL) { - printf("MOBI header not found\n"); - mobi_free_mh(m); - return MOBI_ERROR; + debug_print("%s", "MOBI header not found\n"); + mobi_free_mh(m->mh); + m->mh = NULL; + return MOBI_DATA_CORRUPT; } - saved_maxlen = buf->maxlen; - // read only declared MOBI header length (curr offset minus 8 already read bytes) + const size_t saved_maxlen = buf->maxlen; + /* read only declared MOBI header length (curr offset minus 8 already read bytes) */ buf->maxlen = *m->mh->header_length + buf->offset - 8; - buffer_copy32(&m->mh->mobi_type, buf); - buffer_copy32(&m->mh->text_encoding, buf); - buffer_copy32(&m->mh->uid, buf); - buffer_copy32(&m->mh->file_version, buf); - buffer_copy32(&m->mh->orth_index, buf); - buffer_copy32(&m->mh->infl_index, buf); - buffer_copy32(&m->mh->names_index, buf); - buffer_copy32(&m->mh->keys_index, buf); - buffer_copy32(&m->mh->extra0_index, buf); - buffer_copy32(&m->mh->extra1_index, buf); - buffer_copy32(&m->mh->extra2_index, buf); - buffer_copy32(&m->mh->extra3_index, buf); - buffer_copy32(&m->mh->extra4_index, buf); - buffer_copy32(&m->mh->extra5_index, buf); - buffer_copy32(&m->mh->non_text_index, buf); - buffer_copy32(&m->mh->full_name_offset, buf); - buffer_copy32(&m->mh->full_name_length, buf); - buffer_copy32(&m->mh->locale, buf); - buffer_copy32(&m->mh->input_lang, buf); - buffer_copy32(&m->mh->output_lang, buf); - buffer_copy32(&m->mh->min_version, buf); - buffer_copy32(&m->mh->image_index, buf); - buffer_copy32(&m->mh->huff_rec_index, buf); - buffer_copy32(&m->mh->huff_rec_count, buf); - buffer_copy32(&m->mh->huff_table_offset, buf); - buffer_copy32(&m->mh->huff_table_length, buf); - buffer_copy32(&m->mh->exth_flags, buf); - buf->offset += 32; // 32 unknown bytes - buffer_copy32(&m->mh->unknown6, buf); - buffer_copy32(&m->mh->drm_offset, buf); - buffer_copy32(&m->mh->drm_count, buf); - buffer_copy32(&m->mh->drm_size, buf); - buffer_copy32(&m->mh->drm_flags, buf); - buf->offset += 8; // 8 unknown bytes - buffer_copy16(&m->mh->first_text_index, buf); - buffer_copy16(&m->mh->last_text_index, buf); - buffer_copy32(&m->mh->unknown9, buf); - buffer_copy32(&m->mh->fcis_index, buf); - buffer_copy32(&m->mh->fcis_count, buf); - buffer_copy32(&m->mh->flis_index, buf); - buffer_copy32(&m->mh->flis_count, buf); - buffer_copy32(&m->mh->unknown10, buf); - buffer_copy32(&m->mh->unknown11, buf); - buffer_copy32(&m->mh->srcs_index, buf); - buffer_copy32(&m->mh->srcs_count, buf); - buffer_copy32(&m->mh->unknown12, buf); - buffer_copy32(&m->mh->unknown13, buf); - buf->offset += 2; // 2 byte fill - buffer_copy16(&m->mh->extra_flags, buf); - buffer_copy32(&m->mh->ncx_index, buf); - buffer_copy32(&m->mh->unknown14, buf); - buffer_copy32(&m->mh->unknown15, buf); - buffer_copy32(&m->mh->datp_index, buf); - buffer_copy32(&m->mh->unknown16, buf); - buffer_copy32(&m->mh->unknown17, buf); - buffer_copy32(&m->mh->unknown18, buf); - buffer_copy32(&m->mh->unknown19, buf); - buffer_copy32(&m->mh->unknown20, buf); + buffer_dup32(&m->mh->mobi_type, buf); + buffer_dup32(&m->mh->text_encoding, buf); + buffer_dup32(&m->mh->uid, buf); + buffer_dup32(&m->mh->version, buf); + if (m->mh->version && *m->mh->version == 8) { + isKF8 = 1; + } + buffer_dup32(&m->mh->orth_index, buf); + buffer_dup32(&m->mh->infl_index, buf); + buffer_dup32(&m->mh->names_index, buf); + buffer_dup32(&m->mh->keys_index, buf); + buffer_dup32(&m->mh->extra0_index, buf); + buffer_dup32(&m->mh->extra1_index, buf); + buffer_dup32(&m->mh->extra2_index, buf); + buffer_dup32(&m->mh->extra3_index, buf); + buffer_dup32(&m->mh->extra4_index, buf); + buffer_dup32(&m->mh->extra5_index, buf); + buffer_dup32(&m->mh->non_text_index, buf); + buffer_dup32(&m->mh->full_name_offset, buf); + buffer_dup32(&m->mh->full_name_length, buf); + buffer_dup32(&m->mh->locale, buf); + buffer_dup32(&m->mh->dict_input_lang, buf); + buffer_dup32(&m->mh->dict_output_lang, buf); + buffer_dup32(&m->mh->min_version, buf); + buffer_dup32(&m->mh->image_index, buf); + buffer_dup32(&m->mh->huff_rec_index, buf); + buffer_dup32(&m->mh->huff_rec_count, buf); + buffer_dup32(&m->mh->datp_rec_index, buf); + buffer_dup32(&m->mh->datp_rec_count, buf); + buffer_dup32(&m->mh->exth_flags, buf); + buf->offset += 32; /* 32 unknown bytes */ + buffer_dup32(&m->mh->unknown6, buf); + buffer_dup32(&m->mh->drm_offset, buf); + buffer_dup32(&m->mh->drm_count, buf); + buffer_dup32(&m->mh->drm_size, buf); + buffer_dup32(&m->mh->drm_flags, buf); + buf->offset += 8; /* 8 unknown bytes */ + if (isKF8) { + buffer_dup32(&m->mh->fdst_index, buf); + } else { + buffer_dup16(&m->mh->first_text_index, buf); + buffer_dup16(&m->mh->last_text_index, buf); + } + buffer_dup32(&m->mh->fdst_section_count, buf); + buffer_dup32(&m->mh->fcis_index, buf); + buffer_dup32(&m->mh->fcis_count, buf); + buffer_dup32(&m->mh->flis_index, buf); + buffer_dup32(&m->mh->flis_count, buf); + buffer_dup32(&m->mh->unknown10, buf); + buffer_dup32(&m->mh->unknown11, buf); + buffer_dup32(&m->mh->srcs_index, buf); + buffer_dup32(&m->mh->srcs_count, buf); + buffer_dup32(&m->mh->unknown12, buf); + buffer_dup32(&m->mh->unknown13, buf); + buf->offset += 2; /* 2 byte fill */ + buffer_dup16(&m->mh->extra_flags, buf); + buffer_dup32(&m->mh->ncx_index, buf); + if (isKF8) { + buffer_dup32(&m->mh->fragment_index, buf); + buffer_dup32(&m->mh->skeleton_index, buf); + } else { + buffer_dup32(&m->mh->unknown14, buf); + buffer_dup32(&m->mh->unknown15, buf); + } + buffer_dup32(&m->mh->datp_index, buf); + if (isKF8) { + buffer_dup32(&m->mh->guide_index, buf); + } else { + buffer_dup32(&m->mh->unknown16, buf); + } + buffer_dup32(&m->mh->unknown17, buf); + buffer_dup32(&m->mh->unknown18, buf); + buffer_dup32(&m->mh->unknown19, buf); + buffer_dup32(&m->mh->unknown20, buf); if (buf->maxlen > buf->offset) { + debug_print("Skipping %zu unknown bytes in MOBI header\n", (buf->maxlen - buf->offset)); buf->offset = buf->maxlen; } buf->maxlen = saved_maxlen; return MOBI_SUCCESS; } - -// parse -int mobi_parse_record0(MOBIData *m, size_t seqnumber) { - MOBIBuffer *buf; - MOBIPdbRecord *record0; +/** + @brief Parse Record 0 into MOBIData structure + + This function will parse MOBIRecord0Header, MOBIMobiHeader and MOBIExthHeader + + @param[in,out] m MOBIData structure to be filled with parsed data + @param[in] seqnumber Sequential number of the palm database record + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_record0(MOBIData *m, const size_t seqnumber) { + MOBI_RET ret; if (m == NULL) { - printf("Mobi structure not initialized\n"); - return MOBI_ERROR; + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; + } + const MOBIPdbRecord *record0 = mobi_get_record_by_seqnumber(m, seqnumber); + if (record0 == NULL) { + debug_print("%s", "Record 0 not loaded\n"); + return MOBI_DATA_CORRUPT; } - record0 = mobi_get_record_by_seqnumber(m, seqnumber); - if (record0 == NULL || record0->size == 0) { - printf("Record 0 not loaded\n"); - return MOBI_ERROR; + if (record0->size < RECORD0_HEADER_LEN) { + debug_print("%s", "Record 0 too short\n"); + return MOBI_DATA_CORRUPT; } - buf = buffer_init(record0->size); + MOBIBuffer *buf = buffer_init_null(record0->size); if (buf == NULL) { - return MOBI_ERROR; + return MOBI_MALLOC_FAILED; } - memcpy(buf->data, record0->data, record0->size); + buf->data = record0->data; m->rh = calloc(1, sizeof(MOBIRecord0Header)); if (m->rh == NULL) { - printf("Memory allocation for record 0 header failed\n"); - return MOBI_ERROR; - } - // parse palmdoc header - m->rh->compression_type = buffer_get16(buf); - buf->offset += 2; // unused, 0 + debug_print("%s", "Memory allocation for record 0 header failed\n"); + buffer_free_null(buf); + return MOBI_MALLOC_FAILED; + } + /* parse palmdoc header */ + const uint16_t compression = buffer_get16(buf); + buf->offset += 2; // unused 2 bytes, zeroes + if ((compression != RECORD0_NO_COMPRESSION && + compression != RECORD0_PALMDOC_COMPRESSION && + compression != RECORD0_HUFF_COMPRESSION)) { + debug_print("Wrong record0 header: %c%c%c%c\n", record0->data[0], record0->data[1], record0->data[2], record0->data[3]); + buffer_free_null(buf); + free(m->rh); + m->rh = NULL; + return MOBI_DATA_CORRUPT; + } + m->rh->compression_type = compression; m->rh->text_length = buffer_get32(buf); m->rh->text_record_count = buffer_get16(buf); m->rh->text_record_size = buffer_get16(buf); m->rh->encryption_type = buffer_get16(buf); m->rh->unknown1 = buffer_get16(buf); - if (strcmp(m->ph->type, "BOOK") == 0 && strcmp(m->ph->creator, "MOBI") == 0) { - // parse mobi header - mobi_parse_mobiheader(m, buf); - // parse exth header - mobi_parse_extheader(m, buf); + if (mobi_is_mobipocket(m)) { + /* parse mobi header if present */ + ret = mobi_parse_mobiheader(m, buf); + if (ret == MOBI_SUCCESS) { + /* parse exth header if present */ + mobi_parse_extheader(m, buf); + } } - buffer_free(buf); + buffer_free_null(buf); return MOBI_SUCCESS; } -int mobi_parse_huff(MOBIHuffCdic *huffcdic, MOBIPdbRecord *record) { - MOBIBuffer *buf; - char huff_magic[5]; - size_t header_length; - buf = buffer_init(record->size); +/** + @brief Calculate the size of extra bytes at the end of text record + + @param[in] record MOBIPdbRecord structure containing the record + @param[in] flags Flags from MOBI header (extra_flags) + @return The size of trailing bytes, MOBI_NOTSET on failure + */ +size_t mobi_get_record_extrasize(const MOBIPdbRecord *record, const uint16_t flags) { + size_t extra_size = 0; + MOBIBuffer *buf = buffer_init_null(record->size); + if (buf == NULL) { + debug_print("%s", "Buffer init in extrasize failed\n"); + return MOBI_NOTSET; + } + buf->data = record->data; + /* set pointer at the end of the record data */ + buf->offset = buf->maxlen - 1; + for (int bit = 15; bit > 0; bit--) { + if (flags & (1 << bit)) { + /* bit is set */ + size_t len = 0; + /* size contains varlen itself and optional data */ + const uint32_t size = buffer_get_varlen_dec(buf, &len); + /* skip data */ + /* TODO: read and store in record struct */ + buf->offset -= (size - len); + extra_size += size; + } + }; + /* check bit 0 */ + if (flags & 1) { + const uint8_t b = buffer_get8(buf); + /* two first bits hold size */ + extra_size += (b & 0x3) + 1; + + } + buffer_free_null(buf); + return extra_size; +} + +/** + @brief Parse HUFF record into MOBIHuffCdic structure + + @param[in,out] huffcdic MOBIHuffCdic structure to be filled with parsed data + @param[in] record MOBIPdbRecord structure containing the record + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_huff(MOBIHuffCdic *huffcdic, const MOBIPdbRecord *record) { + MOBIBuffer *buf = buffer_init_null(record->size); if (buf == NULL) { - return MOBI_ERROR; + return MOBI_MALLOC_FAILED; } - memcpy(buf->data, record->data, record->size); + buf->data = record->data; + char huff_magic[5]; buffer_getstring(huff_magic, buf, 4); - header_length = buffer_get32(buf); + const size_t header_length = buffer_get32(buf); if (strncmp(huff_magic, HUFF_MAGIC, 4) != 0 || header_length < HUFF_HEADER_LEN) { - printf("HUFF wrong magic: %s\n", huff_magic); - buffer_free(buf); - return MOBI_ERROR; + debug_print("HUFF wrong magic: %s\n", huff_magic); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; } - size_t data1_offset = buffer_get32(buf); - size_t data2_offset = buffer_get32(buf); - // skip little-endian table offsets + const size_t data1_offset = buffer_get32(buf); + const size_t data2_offset = buffer_get32(buf); + /* skip little-endian table offsets */ buf->offset = data1_offset; if (buf->offset + (256 * 4) > buf->maxlen) { - printf("HUFF data1 too short\n"); - buffer_free(buf); - return MOBI_ERROR; + debug_print("%s", "HUFF data1 too short\n"); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; } - // read 256 indices from data1 big-endian + /* read 256 indices from data1 big-endian */ for (int i = 0; i < 256; i++) { huffcdic->table1[i] = buffer_get32(buf); } buf->offset = data2_offset; if (buf->offset + (64 * 4) > buf->maxlen) { - printf("HUFF data2 too short\n"); - buffer_free(buf); - return MOBI_ERROR; + debug_print("%s", "HUFF data2 too short\n"); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; } - // read 32 mincode-maxcode pairs from data2 big-endian - uint32_t mincode, maxcode; + /* read 32 mincode-maxcode pairs from data2 big-endian */ huffcdic->mincode_table[0] = 0; huffcdic->maxcode_table[0] = 0xFFFFFFFF; for (int i = 1; i < 33; i++) { - mincode = buffer_get32(buf); - maxcode = buffer_get32(buf); + const uint32_t mincode = buffer_get32(buf); + const uint32_t maxcode = buffer_get32(buf); huffcdic->mincode_table[i] = mincode << (32 - i); huffcdic->maxcode_table[i] = ((maxcode + 1) << (32 - i)) - 1; } - buffer_free(buf); + buffer_free_null(buf); return MOBI_SUCCESS; } -int mobi_parse_cdic(MOBIHuffCdic *huffcdic, MOBIPdbRecord *record, int num) { - MOBIBuffer *buf; - char cdic_magic[5]; - size_t header_length, index_count, code_length; - buf = buffer_init(record->size); +/** + @brief Parse CDIC record into MOBIHuffCdic structure + + @param[in,out] huffcdic MOBIHuffCdic structure to be filled with parsed data + @param[in] record MOBIPdbRecord structure containing the record + @param[in] num Number of CDIC record in a set, starting from zero + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_cdic(MOBIHuffCdic *huffcdic, const MOBIPdbRecord *record, const size_t num) { + MOBIBuffer *buf = buffer_init_null(record->size); if (buf == NULL) { - return MOBI_ERROR; + return MOBI_MALLOC_FAILED; } - memcpy(buf->data, record->data, record->size); + buf->data = record->data; + char cdic_magic[5]; buffer_getstring(cdic_magic, buf, 4); - header_length = buffer_get32(buf); + const size_t header_length = buffer_get32(buf); if (strncmp(cdic_magic, CDIC_MAGIC, 4) != 0 || header_length < CDIC_HEADER_LEN) { - printf("CDIC wrong magic: %s\n", cdic_magic); - buffer_free(buf); - return MOBI_ERROR; - } - // variables in huffcdic initialized to zero with calloc - // save initial count and length - index_count = buffer_get32(buf); - code_length = buffer_get32(buf); + debug_print("CDIC wrong magic: %s or declared header length: %zu\n", cdic_magic, header_length); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; + } + /* variables in huffcdic initialized to zero with calloc */ + /* save initial count and length */ + size_t index_count = buffer_get32(buf); + const size_t code_length = buffer_get32(buf); if (huffcdic->code_length && huffcdic->code_length != code_length) { - printf("Warning: CDIC different code length %zu in record %i, previous was %zu\n", huffcdic->code_length, record->uid, code_length); + debug_print("Warning: CDIC different code length %zu in record %i, previous was %zu\n", huffcdic->code_length, record->uid, code_length); } if (huffcdic->index_count && huffcdic->index_count != index_count) { - printf("Warning: CDIC different index count %zu in record %i, previous was %zu\n", huffcdic->index_count, record->uid, index_count); + debug_print("Warning: CDIC different index count %zu in record %i, previous was %zu\n", huffcdic->index_count, record->uid, index_count); } huffcdic->code_length = code_length; huffcdic->index_count = index_count; if (index_count == 0) { - printf("CDIC index count is null"); - buffer_free(buf); - return MOBI_ERROR; + debug_print("%s", "CDIC index count is null"); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; } - // allocate memory for symbol offsets if not already allocated + /* allocate memory for symbol offsets if not already allocated */ if (num == 0) { huffcdic->symbol_offsets = malloc(index_count * sizeof(*huffcdic->symbol_offsets)); if (huffcdic->symbol_offsets == NULL) { - printf("CDIC cannot allocate memory"); - buffer_free(buf); - return MOBI_ERROR; + debug_print("%s", "CDIC cannot allocate memory"); + buffer_free_null(buf); + return MOBI_MALLOC_FAILED; } } index_count -= huffcdic->index_read; - // limit number of records read to code_length bits + /* limit number of records read to code_length bits */ if (index_count >> code_length) { index_count = (1 << code_length); } if (buf->offset + (index_count * 2) > buf->maxlen) { - printf("CDIC indices data too short\n"); - buffer_free(buf); + debug_print("%s", "CDIC indices data too short\n"); free(huffcdic->symbol_offsets); - return MOBI_ERROR; + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; } - // read i * 2 byte big-endian indices + /* read i * 2 byte big-endian indices */ while (index_count--) { huffcdic->symbol_offsets[huffcdic->index_read++] = buffer_get16(buf); } if (buf->offset + code_length > buf->maxlen) { - printf("CDIC dictionary data too short"); + debug_print("%s", "CDIC dictionary data too short\n"); free(huffcdic->symbol_offsets); - buffer_free(buf); - return MOBI_ERROR; + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; } - // copy pointer to data + /* copy pointer to data */ huffcdic->symbols[num] = record->data + CDIC_HEADER_LEN; - // free buffer - buffer_free(buf); + /* free buffer */ + buffer_free_null(buf); return MOBI_SUCCESS; } -int mobi_parse_huffdic(MOBIData *m, MOBIHuffCdic *huffcdic) { - MOBIPdbRecord *curr; - int ret, i = 0; +/** + @brief Parse a set of HUFF and CDIC records into MOBIHuffCdic structure + + @param[in] m MOBIData structure with loaded MOBI document + @param[in,out] huffcdic MOBIHuffCdic structure to be filled with parsed data + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_huffdic(const MOBIData *m, MOBIHuffCdic *huffcdic) { + MOBI_RET ret; + const size_t offset = mobi_get_kf8offset(m); if (m->mh == NULL || m->mh->huff_rec_index == NULL) { - printf("HUFF/CDIC records metadata not found in MOBI header\n"); - return MOBI_ERROR; + debug_print("%s", "HUFF/CDIC records metadata not found in MOBI header\n"); + return MOBI_DATA_CORRUPT; } - size_t huff_rec_index = *m->mh->huff_rec_index; - size_t huff_rec_count = *m->mh->huff_rec_count; - curr = mobi_get_record_by_seqnumber(m, huff_rec_index); + const size_t huff_rec_index = *m->mh->huff_rec_index + offset; + const size_t huff_rec_count = *m->mh->huff_rec_count; + const MOBIPdbRecord *curr = mobi_get_record_by_seqnumber(m, huff_rec_index); if (curr == NULL) { - printf("HUFF record not found\n"); - return MOBI_ERROR; + debug_print("%s", "HUFF record not found\n"); + return MOBI_DATA_CORRUPT; } if (curr->size < HUFF_RECORD_MINSIZE) { - printf("HUFF record too short (%zu b)\n", curr->size); - return MOBI_ERROR; + debug_print("HUFF record too short (%zu b)\n", curr->size); + return MOBI_DATA_CORRUPT; } ret = mobi_parse_huff(huffcdic, curr); - if (ret == MOBI_ERROR) { - printf("HUFF parsing failed\n"); - return MOBI_ERROR; + if (ret != MOBI_SUCCESS) { + debug_print("%s", "HUFF parsing failed\n"); + return ret; } - //huff_rec_index++; curr = curr->next; - // allocate memory for symbols data in each CDIC record + /* allocate memory for symbols data in each CDIC record */ huffcdic->symbols = malloc((huff_rec_count - 1) * sizeof(*huffcdic->symbols)); - // get following CDIC records + /* get following CDIC records */ + size_t i = 0; while (i < huff_rec_count - 1) { ret = mobi_parse_cdic(huffcdic, curr, i++); - if (ret == MOBI_ERROR) { - printf("CDIC parsing failed\n"); + if (ret != MOBI_SUCCESS) { + debug_print("%s", "CDIC parsing failed\n"); free(huffcdic->symbols); - return MOBI_ERROR; + return ret; } curr = curr->next; } - return MOBI_SUCCESS; } -int mobi_load_file(MOBIData *m, FILE *file) { - int ret; + +/** + @brief Parse FDST record into MOBIRawml structure (MOBIFdst member) + + @param[in] m MOBIData structure with loaded MOBI document + @param[in,out] rawml MOBIRawml structure to be filled with parsed data + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_fdst(const MOBIData *m, MOBIRawml *rawml) { if (m == NULL) { - printf("Mobi structure not initialized\n"); - return MOBI_ERROR; + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; } - ret = mobi_load_pdbheader(m, file); + const size_t fdst_record_number = mobi_get_fdst_record_number(m); + if (fdst_record_number == MOBI_NOTSET) { + return MOBI_DATA_CORRUPT; + } + const MOBIPdbRecord *fdst_record = mobi_get_record_by_seqnumber(m, fdst_record_number); + MOBIBuffer *buf = buffer_init_null(fdst_record->size); + if (buf == NULL) { + return MOBI_MALLOC_FAILED; + } + buf->data = fdst_record->data; + char fdst_magic[5]; + buffer_getstring(fdst_magic, buf, 4); + const size_t data_offset = buffer_get32(buf); + const size_t section_count = buffer_get32(buf); + if (strncmp(fdst_magic, FDST_MAGIC, 4) != 0 || + section_count <= 1 || + section_count != *m->mh->fdst_section_count || + data_offset != 12) { + debug_print("FDST wrong magic: %s, sections count: %zu or data offset: %zu\n", fdst_magic, section_count, data_offset); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; + } + if ((buf->maxlen - buf->offset) < section_count * 8) { + debug_print("%s", "Record FDST too short\n"); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; + } + rawml->fdst = malloc(sizeof(MOBIFdst)); + rawml->fdst->fdst_section_count = section_count; + rawml->fdst->fdst_section_starts = malloc(sizeof(uint32_t) * section_count); + rawml->fdst->fdst_section_ends = malloc(sizeof(uint32_t) * section_count); + size_t i = 0; + while (i < section_count) { + rawml->fdst->fdst_section_starts[i] = buffer_get32(buf); + rawml->fdst->fdst_section_ends[i] = buffer_get32(buf); + debug_print("FDST[%zu]:\t%i\t%i\n", i, rawml->fdst->fdst_section_starts[i], rawml->fdst->fdst_section_ends[i]); + i++; + } + buffer_free_null(buf); + return MOBI_SUCCESS; +} +/** + @brief Read MOBI document from file into MOBIData structure + + @param[in,out] m MOBIData structure to be filled with read data + @param[in] file File descriptor to read from + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_load_file(MOBIData *m, FILE *file) { + MOBI_RET ret; + if (m == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; + } + ret = mobi_load_pdbheader(m, file); + if (ret != MOBI_SUCCESS) { + return ret; + } if (strcmp(m->ph->type, "BOOK") != 0 && strcmp(m->ph->type, "TEXt") != 0) { - printf("Unsupported file type: %s\n", m->ph->type); - return MOBI_ERROR; + debug_print("Unsupported file type: %s\n", m->ph->type); + return MOBI_FILE_UNSUPPORTED; } - - if (ret == MOBI_ERROR || m->ph->rec_count == 0) { - printf("No records found\n"); - return MOBI_ERROR; + if (m->ph->rec_count == 0) { + debug_print("%s", "No records found\n"); + return MOBI_DATA_CORRUPT; } ret = mobi_load_reclist(m, file); - if (ret == MOBI_ERROR) { - return MOBI_ERROR; + if (ret != MOBI_SUCCESS) { + return ret; } - ret = mobi_load_recdata(m, file); - if (ret == MOBI_ERROR) { - return MOBI_ERROR; + ret = mobi_load_rec(m, file); + if (ret != MOBI_SUCCESS) { + return ret; } ret = mobi_parse_record0(m, 0); - // if EXTH is loaded and use_kf8 flag is set parse KF8 record0 for joined mobi7/kf8 file + if (ret != MOBI_SUCCESS) { + return ret; + } + /* if EXTH is loaded and use_kf8 flag is set parse KF8 record0 for hybrid KF7/KF8 file */ if (m->eh && m->use_kf8) { - int boundary_rec_number; - boundary_rec_number = mobi_get_kf8boundary(m); - if (boundary_rec_number >= 0) { - // it is a joint mobi7/kf8 file + const size_t boundary_rec_number = mobi_get_kf8boundary_seqnumber(m); + if (boundary_rec_number != MOBI_NOTSET && boundary_rec_number < UINT32_MAX) { + /* it is a hybrid KF7/KF8 file */ + m->kf8_boundary_offset = (uint32_t) boundary_rec_number; m->next = mobi_init(); - // link pdb header and records data to kf8data structure + /* link pdb header and records data to KF8data structure */ m->next->ph = m->ph; m->next->rec = m->rec; - // close next loop + /* close next loop */ m->next->next = m; ret = mobi_parse_record0(m->next, boundary_rec_number + 1); - mobi_swap_mobidata(m); + if (ret != MOBI_SUCCESS) { + return ret; } + mobi_swap_mobidata(m); + } } - return ret; + return MOBI_SUCCESS; } -int mobi_load_filename(MOBIData *m, const char *path) { - FILE *file; - int ret; - file = fopen(path, "rb"); - ret = mobi_load_file(m, file); +/** + @brief Read MOBI document from a path into MOBIData structure + + @param[in,out] m MOBIData structure to be filled with read data + @param[in] path Path to a MOBI document on disk (eg. /home/me/test.mobi) + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_load_filename(MOBIData *m, const char *path) { + FILE *file = fopen(path, "rb"); + if (file == NULL) { + debug_print("%s", "File not found\n"); + return MOBI_FILE_NOT_FOUND; + } + const MOBI_RET ret = mobi_load_file(m, file); fclose(file); return ret; } diff --git a/src/read.h b/src/read.h index 5baa6fd..f9dd51e 100644 --- a/src/read.h +++ b/src/read.h @@ -1,21 +1,23 @@ -// -// read.h -// mobi -// -// Created by Bartek on 26.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file read.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ -#ifndef mobi_read_h -#define mobi_read_h +#ifndef libmobi_read_h +#define libmobi_read_h +#include "config.h" #include "mobi.h" #include "memory.h" -#include "util.h" -int mobi_load_pdbheader(MOBIData *m, FILE *file); -int mobi_load_reclist(MOBIData *m, FILE *file); -int mobi_load_recdata(MOBIData *m, FILE *file); -int mobi_load_rec(MOBIPdbRecord *rec, FILE *file); +MOBI_RET mobi_load_pdbheader(MOBIData *m, FILE *file); +MOBI_RET mobi_load_reclist(MOBIData *m, FILE *file); +MOBI_RET mobi_load_rec(MOBIData *m, FILE *file); +MOBI_RET mobi_load_recdata(MOBIPdbRecord *rec, FILE *file); #endif diff --git a/src/util.c b/src/util.c index 600e1e8..d4978cc 100644 --- a/src/util.c +++ b/src/util.c @@ -1,36 +1,515 @@ -// -// util.c -// mobi -// -// Created by Bartek on 08.04.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file util.c + * @brief Various helper functions + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ +#include +#include +#include #include "util.h" +#include "parse_rawml.h" +#include "index.h" +#include "debug.h" -void mobi_get_fullname(MOBIData *m, char *fullname, size_t len) { +#ifdef USE_LIBXML2 +#include "opf.h" +#endif + +/** @brief Lookup table for cp1252 to utf8 encoding conversion */ +static const unsigned char cp1252_to_utf8[32][3] = { + {0xe2,0x82,0xac}, + {0}, + {0xe2,0x80,0x9a}, + {0xc6,0x92,0}, + {0xe2,0x80,0x9e}, + {0xe2,0x80,0xa6}, + {0xe2,0x80,0xa0}, + {0xe2,0x80,0xa1}, + {0xcb,0x86,0}, + {0xe2,0x80,0xb0}, + {0xc5,0xa0,0}, + {0xe2,0x80,0xb9}, + {0xc5,0x92,0}, + {0}, + {0xc5,0xbd,0}, + {0}, + {0}, + {0xe2,0x80,0x98}, + {0xe2,0x80,0x99}, + {0xe2,0x80,0x9c}, + {0xe2,0x80,0x9d}, + {0xe2,0x80,0xa2}, + {0xe2,0x80,0x93}, + {0xe2,0x80,0x94}, + {0xcb,0x9c,0}, + {0xe2,0x84,0xa2}, + {0xc5,0xa1,0}, + {0xe2,0x80,0xba}, + {0xc5,0x93,0}, + {0}, + {0xc5,0xbe,0}, + {0xc5,0xb8,0}, +}; + +/** + @brief Get libmobi version + + @return String version + */ +const char * mobi_version(void) { +#ifndef PACKAGE_VERSION +#define PACKAGE_VERSION "0.1" +#endif + return PACKAGE_VERSION; +} + +/** + @brief Convert cp1252 encoded string to utf-8 + + Maximum length of output string is 3 * (input string length) + 1 + + @param[in,out] output Output string + @param[in,out] input Input string + @param[in,out] outsize Size of the allocated output buffer, will be set to output string length on return + @param[in] insize Length of the input string. + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_cp1252_to_utf8(char *output, const char *input, size_t *outsize, const size_t insize) { + if (!output || !input) { + return MOBI_PARAM_ERR; + } + const unsigned char *in = (unsigned char *) input; + unsigned char *out = (unsigned char *) output; + const unsigned char *outend = out + *outsize; + const unsigned char *inend = in + insize; + while (*in && in < inend && out < outend) { + if (*in < 0x80) { + *out++ = *in++; + } + else if (*in < 0xa0) { + /* table lookup */ + size_t i = 0; + while (i < 3) { + unsigned char c = cp1252_to_utf8[*in - 0x80][i]; + if (c == 0) { + break; + } + *out++ = c; + i++; + } + if (i == 0) { + /* unassigned character in input */ + return MOBI_DATA_CORRUPT; + } + in++; + } + else if (*in < 0xc0) { + *out++ = 0xc2; + *out++ = *in++; + } + else { + *out++ = 0xc3; + *out++ = (*in++ & 0x3f) + 0x80; + } + } + *out = '\0'; + *outsize = (size_t) (out - (unsigned char *) output); + return MOBI_SUCCESS; +} + +/** @brief Get text encoding of mobi document + + @param[in] m MOBIData structure holding document data and metadata + @return MOBIEncoding text encoding (MOBI_UTF8 or MOBI_CP1252 + */ +MOBIEncoding mobi_get_encoding(const MOBIData *m) { + if (m && m->mh) { + if (m->mh->text_encoding) { + if (*m->mh->text_encoding == MOBI_UTF8) { + return MOBI_UTF8; + } + } + } + return MOBI_CP1252; +} + +/** @brief Check if document's text is cp1252 encoded + + @param[in] m MOBIData structure holding document data and metadata + @return True or false + */ +bool mobi_is_cp1252(const MOBIData *m) { + return (mobi_get_encoding(m) == MOBI_CP1252); +} + +/** + @brief strdup replacement + + Returned pointer must be freed by caller + + @param[in] s Input string + @return Duplicated string + */ +char * mobi_strdup(const char *s) { + char *p = malloc(strlen(s) + 1); + if (p) { strcpy(p, s); } + return p; +} + +#define MOBI_LANG_MAX 99 /**< number of entries in mobi_locale array */ +#define MOBI_REGION_MAX 21 /**< maximum number of entries in each language array */ + +/**< @brief Table of Mobipocket language-region codes + + Based on IANA language-subtag registry with some custom Mobipocket modifications. + http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry + */ +static const char *mobi_locale[MOBI_LANG_MAX][MOBI_REGION_MAX] = { + {"neutral"}, + { + "ar", /**< Arabic >*/ + "ar-sa", /**< Arabic (Saudi Arabia) >*/ + "ar", /**< Arabic (Unknown) */ + "ar-eg", /**< Arabic (Egypt) >*/ + "ar", /**< Arabic (Unknown) */ + "ar-dz", /**< Arabic (Algeria) >*/ + "ar-ma", /**< Arabic (Morocco) >*/ + "ar-tn", /**< Arabic (Tunisia) >*/ + "ar-om", /**< Arabic (Oman) >*/ + "ar-ye", /**< Arabic (Yemen) >*/ + "ar-sy", /**< Arabic (Syria) >*/ + "ar-jo", /**< Arabic (Jordan) >*/ + "ar-lb", /**< Arabic (Lebanon) >*/ + "ar-kw", /**< Arabic (Kuwait) >*/ + "ar-ae", /**< Arabic (UAE) >*/ + "ar-bh", /**< Arabic (Bahrain) >*/ + "ar-qa", /**< Arabic (Qatar) >*/ + }, + {"bg"}, /**< Bulgarian >*/ + {"ca"}, /**< Catalan >*/ + { + "zh", /**< Chinese >*/ + "zh-tw", /**< Chinese (Taiwan) >*/ + "zh-cn", /**< Chinese (PRC) >*/ + "zh-hk", /**< Chinese (Hong Kong) >*/ + "zh-sg", /**< Chinese (Singapore) >*/ + }, + {"cs"}, /**< Czech >*/ + {"da"}, /**< Danish >*/ + { + "de", /**< German >*/ + "de-de", /**< German (Germany) >*/ + "de-ch", /**< German (Switzerland) >*/ + "de-at", /**< German (Austria) >*/ + "de-lu", /**< German (Luxembourg) >*/ + "de-li", /**< German (Liechtenstein) >*/ + }, + {"el"}, /**< Greek (modern) >*/ + { + "en", /**< English >*/ + "en-us", /**< English (United States) >*/ + "en-gb", /**< English (United Kingdom) >*/ + "en-au", /**< English (Australia) >*/ + "en-ca", /**< English (Canada) >*/ + "en-nz", /**< English (New Zealand) >*/ + "en-ie", /**< English (Ireland) >*/ + "en-za", /**< English (South Africa) >*/ + "en-jm", /**< English (Jamaica) >*/ + "en", /**< English (Unknown) >*/ + "en-bz", /**< English (Belize) >*/ + "en-tt", /**< English (Trinidad) >*/ + "en-zw", /**< English (Zimbabwe) >*/ + "en-ph", /**< English (Philippines) >*/ + }, + { + "es", /**< Spanish >*/ + "es-es", /**< Spanish (Spain) >*/ + "es-mx", /**< Spanish (Mexico) >*/ + "es", /**< Spanish (Unknown) >*/ + "es-gt", /**< Spanish (Guatemala) >*/ + "es-cr", /**< Spanish (Costa Rica) >*/ + "es-pa", /**< Spanish (Panama) >*/ + "es-do", /**< Spanish (Dominican Republic) >*/ + "es-ve", /**< Spanish (Venezuela) >*/ + "es-co", /**< Spanish (Colombia) >*/ + "es-pe", /**< Spanish (Peru) >*/ + "es-ar", /**< Spanish (Argentina) >*/ + "es-ec", /**< Spanish (Ecuador) >*/ + "es-cl", /**< Spanish (Chile) >*/ + "es-uy", /**< Spanish (Uruguay) >*/ + "es-py", /**< Spanish (Paraguay) >*/ + "es-bo", /**< Spanish (Bolivia) >*/ + "es-sv", /**< Spanish (El Salvador) >*/ + "es-hn", /**< Spanish (Honduras) >*/ + "es-ni", /**< Spanish (Nicaragua) >*/ + "es-pr", /**< Spanish (Puerto Rico) >*/ + }, + {"fi"}, /**< Finnish >*/ + { + "fr", /**< French >*/ + "fr-fr", /**< French (France) >*/ + "fr-be", /**< French (Belgium) >*/ + "fr-ca", /**< French (Canada) >*/ + "fr-ch", /**< French (Switzerland) >*/ + "fr-lu", /**< French (Luxembourg) >*/ + "fr-mc", /**< French (Monaco) >*/ + }, + {"he"}, /**< Hebrew (also code iw) >*/ + {"hu"}, /**< Hungarian >*/ + {"is"}, /**< Icelandic >*/ + { + "it", /**< Italian >*/ + "it-it", /**< Italian (Italy) >*/ + "it-ch", /**< Italian (Switzerland) >*/ + }, + {"ja"}, /**< Japanese >*/ + {"ko"}, /**< Korean >*/ + { + "nl", /**< Dutch / Flemish >*/ + "nl-nl", /**< Dutch (Netherlands) >*/ + "nl-be", /**< Dutch (Belgium) >*/ + }, + {"no"}, /**< Norwegian >*/ + {"pl"}, /**< Polish >*/ + { + "pt", /**< Portuguese >*/ + "pt-br", /**< Portuguese (Brazil) >*/ + "pt-pt", /**< Portuguese (Portugal) >*/ + }, + {"rm"}, /**< Romansh >*/ + {"ro"}, /**< Romanian >*/ + {"ru"}, /**< Russian >*/ + {"hr"}, /**< Croatian >*/ + { + "sr", /**< Serbian >*/ + "sr", /**< Serbian (Unknown) >*/ + "sr", /**< Serbian (Unknown) >*/ + "sr", /**< Serbian (Serbia) >*/ + }, + {"sk"}, /**< Slovak >*/ + {"sq"}, /**< Albanian >*/ + { + "sv", /**< Swedish >*/ + "sv-se", /**< Swedish (Sweden) >*/ + "sv-fi", /**< Swedish (Finland) >*/ + }, + {"th"}, /**< Thai >*/ + {"tr"}, /**< Turkish >*/ + {"ur"}, /**< Urdu >*/ + {"id"}, /**< Indonesian >*/ + {"uk"}, /**< Ukrainian >*/ + {"be"}, /**< Belarusian >*/ + {"sl"}, /**< Slovenian >*/ + {"et"}, /**< Estonian >*/ + {"lv"}, /**< Latvian >*/ + {"lt"}, /**< Lithuanian >*/ + [41] = {"fa"}, /**< Farsi / Persian >*/ + {"vi"}, /**< Vietnamese >*/ + {"hy"}, /**< Armenian >*/ + {"az"}, /**< Azerbaijani >*/ + {"eu"}, /**< Basque >*/ + {"sb"}, /**< "Sorbian" >*/ + {"mk"}, /**< Macedonian >*/ + {"sx"}, /**< "Sutu" >*/ + {"ts"}, /**< Tsonga >*/ + {"tn"}, /**< Tswana >*/ + [52] = {"xh"}, /**< Xhosa >*/ + {"zu"}, /**< Zulu >*/ + {"af"}, /**< Afrikaans >*/ + {"ka"}, /**< Georgian >*/ + {"fo"}, /**< Faroese >*/ + {"hi"}, /**< Hindi >*/ + {"mt"}, /**< Maltese >*/ + {"sz"}, /**<"Sami (Lappish)" >*/ + {"ga"}, /**< Irish */ + [62] = {"ms"}, /**< Malay >*/ + {"kk"}, /**< Kazakh >*/ + [65] = {"sw"}, /**< Swahili >*/ + [67] = { + "uz", /**< Uzbek >*/ + "uz", /**< Uzbek (Unknown) >*/ + "uz-uz", /**< Uzbek (Uzbekistan) >*/ + }, + {"tt"}, /**< Tatar >*/ + {"bn"}, /**< Bengali >*/ + {"pa"}, /**< Punjabi >*/ + {"gu"}, /**< Gujarati >*/ + {"or"}, /**< Oriya >*/ + {"ta"}, /**< Tamil >*/ + {"te"}, /**< Telugu >*/ + {"kn"}, /**< Kannada >*/ + {"ml"}, /**< Malayalam >*/ + {"as"}, /**< Assamese (not accepted in kindlegen >*/ + {"mr"}, /**< Marathi >*/ + {"sa"}, /**< Sanskrit >*/ + [82] = { + "cy", /**< Welsh */ + "cy-gb" /**< Welsh (UK) */ + }, + { + "gl", /**< Galician */ + "gl-es" /**< Galician (Spain) */ + }, + [87] = {"x-kok"}, /**< Konkani (real language code is kok) >*/ + [97] = {"ne"}, /**< Nepali >*/ + {"fy"}, /**< Northern Frysian >*/ +}; + +/** + @brief Get pointer to locale tag for a given Mobipocket locale number + + Locale strings are based on IANA language-subtag registry with some custom Mobipocket modifications. + See mobi_locale array. + + @param[in] locale_number Mobipocket locale number (as stored in MOBI header) + @return Pointer to locale string in mobi_locale array + */ +const char * mobi_get_locale_string(const uint32_t locale_number) { + uint8_t lang_code = locale_number & 0xffu; + uint32_t region_code = (locale_number >> 8) / 4; + if (lang_code >= MOBI_LANG_MAX || region_code >= MOBI_REGION_MAX) { + return NULL; + } + const char *string = mobi_locale[lang_code][region_code]; + if (string == NULL || strlen(string) == 0 ) { + return NULL; + } + return string; +} + +/** + @brief Get Mobipocket locale number for a given string tag + + Locale strings are based on IANA language-subtag registry with some custom Mobipocket modifications. + See mobi_locale array. + + @param[in] locale_string Locale string tag + @return Mobipocket locale number + */ +size_t mobi_get_locale_number(const char *locale_string) { + if (locale_string == NULL || strlen(locale_string) < 2) { + return 0; + } + size_t lang_code = 0; + while (lang_code < MOBI_LANG_MAX) { + if (mobi_locale[lang_code][0] == NULL) { + lang_code++; + continue; + } + char lower_locale[strlen(locale_string) + 1]; + int i = 0; + while (locale_string[i]) { + lower_locale[i] = (char) tolower(locale_string[i]); + i++; + } + lower_locale[i] = '\0'; + if (strncmp(lower_locale, mobi_locale[lang_code][0], 2) == 0) { + size_t region_code = 0; + while (region_code < MOBI_REGION_MAX) { + if (strcmp(lower_locale, mobi_locale[lang_code][region_code]) == 0) { + return (region_code * 4) << 8 | lang_code; + } + region_code++; + } + return lang_code; + } + lang_code++; + } + return 0; +} + +/** + @brief Array of known file types, their extensions and mime-types. + */ +const MOBIFileMeta mobi_file_meta[] = { + {T_HTML, "html", "application/xhtml+xml"}, + {T_CSS, "css", "text/css"}, + {T_SVG, "svg", "image/svg+xml"}, + {T_JPG, "jpg", "image/jpeg"}, + {T_GIF, "gif", "image/gif"}, + {T_PNG, "png", "image/png"}, + {T_BMP, "bmp", "image/bmp"}, + {T_OTF, "otf", "application/vnd.ms-opentype"}, + {T_TTF, "ttf", "application/x-font-truetype"}, + {T_MP3, "mp3", "audio/mpeg"}, + {T_MPG, "mpg", "video/mpeg"}, + {T_PDF, "pdf", "application/pdf"}, + {T_OPF, "opf", "application/oebps-package+xml"}, + {T_NCX, "ncx", "application/x-dtbncx+xml"}, + /* termination struct */ + {T_UNKNOWN, "dat", "application/unknown"} +}; + +/** + @brief Get MOBIFileMeta tag structure by MOBIFiletype type + + @param[in] type MOBIFiletype type + @return MOBIExthMeta structure for given type, .type = T_UNKNOWN on failure + */ +MOBIFileMeta mobi_get_filemeta_by_type(const MOBIFiletype type) { + size_t i = 0; + while (mobi_file_meta[i].type != T_UNKNOWN) { + if (mobi_file_meta[i].type == type) { + return mobi_file_meta[i]; + } + i++; + } + return mobi_file_meta[i]; +} + +/** + @brief Get ebook full name stored in Record 0 at offset given in MOBI header + + @param[in] m MOBIData structure with loaded data + @param[in,out] fullname Memory area to be filled with zero terminated full name string + @param[in] len Length of memory area allocated for the string + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_fullname(const MOBIData *m, char *fullname, const size_t len) { + if (fullname == NULL || len == 0) { + return MOBI_PARAM_ERR; + } fullname[0] = '\0'; if (m == NULL) { - printf("Mobi structure not initialized\n"); - return; + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; } - MOBIPdbRecord *record0 = mobi_get_record_by_seqnumber(m, 0); - if (m->mh == NULL || m->mh->full_name_offset == NULL || record0 == NULL) { - return; + const size_t offset = mobi_get_kf8offset(m); + MOBIPdbRecord *record0 = mobi_get_record_by_seqnumber(m, offset); + if (m->mh == NULL || + m->mh->full_name_offset == NULL || + m->mh->full_name_length == NULL || + record0 == NULL) { + return MOBI_INIT_FAILED; } - strncpy(fullname, record0->data + *m->mh->full_name_offset, len); + size_t size = min(len, *m->mh->full_name_length); + memcpy(fullname, record0->data + *m->mh->full_name_offset, size); + fullname[size] = '\0'; + return MOBI_SUCCESS; } -MOBIPdbRecord * mobi_get_record_by_uid(MOBIData *m, size_t uid) { - MOBIPdbRecord *curr; +/** + @brief Get palm database record with given unique id + + @param[in] m MOBIData structure with loaded data + @param[in] uid Unique id + @return Pointer to MOBIPdbRecord record structure, NULL on failure + */ +MOBIPdbRecord * mobi_get_record_by_uid(const MOBIData *m, const size_t uid) { if (m == NULL) { - printf("Mobi structure not initialized\n"); + debug_print("%s", "Mobi structure not initialized\n"); return NULL; } if (m->rec == NULL) { return NULL; } - curr = m->rec; + MOBIPdbRecord *curr = m->rec; while (curr != NULL) { if (curr->uid == uid) { return curr; @@ -40,17 +519,125 @@ MOBIPdbRecord * mobi_get_record_by_uid(MOBIData *m, size_t uid) { return NULL; } -MOBIPdbRecord * mobi_get_record_by_seqnumber(MOBIData *m, size_t num) { - MOBIPdbRecord *curr; +/** + @brief Get rawml->markup MOBIPart part by uid + + @param[in] rawml MOBIRawml structure with loaded data + @param[in] uid Unique id + @return Pointer to MOBIPart structure, NULL on failure + */ +MOBIPart * mobi_get_part_by_uid(const MOBIRawml *rawml, const size_t uid) { + if (rawml == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return NULL; + } + if (rawml->markup == NULL) { + return NULL; + } + MOBIPart *part = rawml->markup; + while (part != NULL) { + if (part->uid == uid) { + return part; + } + part = part->next; + } + return NULL; +} + +/** + @brief Get rawml->flow MOBIPart part by uid + + @param[in] rawml MOBIRawml structure with loaded data + @param[in] uid Unique id + @return Pointer to MOBIPart structure, NULL on failure + */ +MOBIPart * mobi_get_flow_by_uid(const MOBIRawml *rawml, const size_t uid) { + if (rawml == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return NULL; + } + if (rawml->flow == NULL) { + return NULL; + } + MOBIPart *part = rawml->flow; + while (part != NULL) { + if (part->uid == uid) { + return part; + } + part = part->next; + } + return NULL; +} + +/** + @brief Get MOBIPart resource record with given unique id + + @param[in] rawml MOBIRawml structure with loaded data + @param[in] uid Unique id + @return Pointer to MOBIPart resource structure, NULL on failure + */ +MOBIPart * mobi_get_resource_by_uid(const MOBIRawml *rawml, const size_t uid) { + if (rawml == NULL) { + debug_print("%s", "Rawml structure not initialized\n"); + return NULL; + } + if (rawml->resources == NULL) { + debug_print("%s", "Rawml structure not initialized\n"); + return NULL; + } + MOBIPart *curr = rawml->resources; + while (curr != NULL) { + if (curr->uid == uid) { + return curr; + } + curr = curr->next; + } + return NULL; +} + +/** + @brief Get MOBIFiletype type of MOBIPart resource record with given unique id + + @param[in] rawml MOBIRawml structure with loaded data + @param[in] uid Unique id + @return Pointer to MOBIPart resource structure, NULL on failure + */ +MOBIFiletype mobi_get_resourcetype_by_uid(const MOBIRawml *rawml, const size_t uid) { + if (rawml == NULL) { + debug_print("%s", "Rawml structure not initialized\n"); + return T_UNKNOWN; + } + if (rawml->resources == NULL) { + debug_print("%s", "Rawml structure not initialized\n"); + return T_UNKNOWN; + } + MOBIPart *curr = rawml->resources; + while (curr != NULL) { + if (curr->uid == uid) { + return curr->type; + } + curr = curr->next; + } + return T_UNKNOWN; +} + +/** + @brief Get palm database record with given sequential number (first record has number 0) + + @param[in] m MOBIData structure with loaded data + @param[in] num Sequential number + @return Pointer to MOBIPdbRecord record structure, NULL on failure + */ +MOBIPdbRecord * mobi_get_record_by_seqnumber(const MOBIData *m, const size_t num) { if (m == NULL) { - printf("Mobi structure not initialized\n"); + debug_print("%s", "Mobi structure not initialized\n"); return NULL; } if (m->rec == NULL) { return NULL; } - int i = 0; - curr = m->rec; + MOBIPdbRecord *curr = m->rec; + size_t i = 0; while (curr != NULL) { if (i++ == num) { return curr; @@ -60,18 +647,24 @@ MOBIPdbRecord * mobi_get_record_by_seqnumber(MOBIData *m, size_t num) { return NULL; } -int mobi_delete_record_by_seqnumber(MOBIData *m, size_t num) { - MOBIPdbRecord *curr, *prev; +/** + @brief Delete palm database record with given sequential number from MOBIData structure + + @param[in,out] m MOBIData structure with loaded data + @param[in] num Sequential number + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_delete_record_by_seqnumber(MOBIData *m, const size_t num) { if (m == NULL) { - printf("Mobi structure not initialized\n"); - return MOBI_ERROR; + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; } if (m->rec == NULL) { - return MOBI_ERROR; + return MOBI_INIT_FAILED; } - int i = 0; - curr = m->rec; - prev = NULL; + size_t i = 0; + MOBIPdbRecord *curr = m->rec; + MOBIPdbRecord *prev = NULL; while (curr != NULL) { if (i++ == num) { if (prev == NULL) { @@ -91,18 +684,24 @@ int mobi_delete_record_by_seqnumber(MOBIData *m, size_t num) { return MOBI_SUCCESS; } -MOBIExtHeader * mobi_get_exthtag_by_uid(MOBIData *m, size_t uid) { - MOBIExtHeader *curr; +/** + @brief Get EXTH record with given MOBIExthTag tag + + @param[in] m MOBIData structure with loaded data + @param[in] tag MOBIExthTag EXTH record tag + @return Pointer to MOBIExthHeader record structure + */ +MOBIExthHeader * mobi_get_exthrecord_by_tag(const MOBIData *m, const MOBIExthTag tag) { if (m == NULL) { - printf("Mobi structure not initialized\n"); + debug_print("%s", "Mobi structure not initialized\n"); return NULL; } if (m->eh == NULL) { return NULL; } - curr = m->eh; + MOBIExthHeader *curr = m->eh; while (curr != NULL) { - if (curr->uid == uid) { + if (curr->tag == tag) { return curr; } curr = curr->next; @@ -110,211 +709,1051 @@ MOBIExtHeader * mobi_get_exthtag_by_uid(MOBIData *m, size_t uid) { return NULL; } -size_t sizeof_trailing_entry(MOBIPdbRecord *record, size_t psize) { - size_t bitpos = 0; - size_t result = 0; - uint8_t v; - while (1) { - v = *(record->data + psize - 1); - result |= (v & 0x7F) << bitpos; - bitpos += 7; - psize -= 1; - if ((v & 0x80) != 0 || (bitpos >= 28) || (psize == 0)) { - return result; +/** + @brief Array of known EXTH tags. + Name strings shamelessly copied from KindleUnpack + */ +const MOBIExthMeta mobi_exth_tags[] = { + /* numeric */ + {EXTH_SAMPLE, EXTH_NUMERIC, "Sample"}, + {EXTH_STARTREADING, EXTH_NUMERIC, "Start offset"}, + {EXTH_KF8BOUNDARY, EXTH_NUMERIC, "K8 boundary offset"}, + {EXTH_COUNTRESOURCES, EXTH_NUMERIC, "K8 count of resources, fonts, images"}, + {EXTH_RESCOFFSET, EXTH_NUMERIC, "RESC offset"}, + {EXTH_COVEROFFSET, EXTH_NUMERIC, "Cover offset"}, + {EXTH_THUMBOFFSET, EXTH_NUMERIC, "Thumbnail offset"}, + {EXTH_HASFAKECOVER, EXTH_NUMERIC, "Has fake cover"}, + {EXTH_CREATORSOFT, EXTH_NUMERIC, "Creator software"}, + {EXTH_CREATORMAJOR, EXTH_NUMERIC, "Creator major version"}, + {EXTH_CREATORMINOR, EXTH_NUMERIC, "Creator minor version"}, + {EXTH_CREATORBUILD, EXTH_NUMERIC, "Creator build number"}, + {EXTH_CLIPPINGLIMIT, EXTH_NUMERIC, "Clipping limit"}, + {EXTH_PUBLISHERLIMIT, EXTH_NUMERIC, "Publisher limit"}, + {EXTH_TTSDISABLE, EXTH_NUMERIC, "Text to Speech disabled"}, + {EXTH_RENTAL, EXTH_NUMERIC, "Rental indicator"}, + /* strings */ + {EXTH_DRMSERVER, EXTH_STRING, "Drm server id"}, + {EXTH_DRMCOMMERCE, EXTH_STRING, "Drm commerce id"}, + {EXTH_DRMEBOOKBASE, EXTH_STRING, "Drm Ebookbase book id"}, + {EXTH_TITLE, EXTH_STRING, "Title"}, + {EXTH_AUTHOR, EXTH_STRING, "Creator"}, + {EXTH_PUBLISHER, EXTH_STRING, "Publisher"}, + {EXTH_IMPRINT, EXTH_STRING, "Imprint"}, + {EXTH_DESCRIPTION, EXTH_STRING, "Description"}, + {EXTH_ISBN, EXTH_STRING, "ISBN"}, + {EXTH_SUBJECT, EXTH_STRING, "Subject"}, + {EXTH_PUBLISHINGDATE, EXTH_STRING, "Published"}, + {EXTH_REVIEW, EXTH_STRING, "Review"}, + {EXTH_CONTRIBUTOR, EXTH_STRING, "Contributor"}, + {EXTH_RIGHTS, EXTH_STRING, "Rights"}, + {EXTH_SUBJECTCODE, EXTH_STRING, "Subject code"}, + {EXTH_TYPE, EXTH_STRING, "Type"}, + {EXTH_SOURCE, EXTH_STRING, "Source"}, + {EXTH_ASIN, EXTH_STRING, "ASIN"}, + {EXTH_VERSION, EXTH_STRING, "Version number"}, + {EXTH_ADULT, EXTH_STRING, "Adult"}, + {EXTH_PRICE, EXTH_STRING, "Price"}, + {EXTH_CURRENCY, EXTH_STRING, "Currency"}, + {EXTH_FIXEDLAYOUT, EXTH_STRING, "Fixed layout"}, + {EXTH_BOOKTYPE, EXTH_STRING, "Book type"}, + {EXTH_ORIENTATIONLOCK, EXTH_STRING, "Orientation lock"}, + {EXTH_ORIGRESOLUTION, EXTH_STRING, "Original resolution"}, + {EXTH_ZEROGUTTER, EXTH_STRING, "Zero gutter"}, + {EXTH_ZEROMARGIN, EXTH_STRING, "Zero margin"}, + {EXTH_KF8COVERURI, EXTH_STRING, "K8 masthead/cover image"}, + {EXTH_REGIONMAGNI, EXTH_STRING, "Region magnification"}, + {EXTH_DICTNAME, EXTH_STRING, "Dictionary short name"}, + {EXTH_WATERMARK, EXTH_STRING, "Watermark"}, + {EXTH_DOCTYPE, EXTH_STRING, "Document type"}, + {EXTH_LASTUPDATE, EXTH_STRING, "Last update time"}, + {EXTH_UPDATEDTITLE, EXTH_STRING, "Updated title"}, + {EXTH_ASIN504, EXTH_STRING, "ASIN (504)"}, + {EXTH_TITLEFILEAS, EXTH_STRING, "Title file as"}, + {EXTH_CREATORFILEAS, EXTH_STRING, "Creator file as"}, + {EXTH_PUBLISHERFILEAS, EXTH_STRING, "Publisher file as"}, + {EXTH_LANGUAGE, EXTH_STRING, "Language"}, + {EXTH_ALIGNMENT, EXTH_STRING, "Primary writing mode"}, + {EXTH_PAGEDIR, EXTH_STRING, "Page progression direction"}, + {EXTH_OVERRIDEFONTS, EXTH_STRING, "Override Kindle fonts"}, + {EXTH_SORCEDESC, EXTH_STRING, "Original source description"}, + {EXTH_UNK534, EXTH_STRING, "Unknown (534)"}, + {EXTH_CREATORBUILDREV, EXTH_STRING, "Kindlegen BuildRev number"}, + /* binary */ + {EXTH_TAMPERKEYS, EXTH_BINARY, "Tamper proof keys"}, + {EXTH_FONTSIGNATURE, EXTH_BINARY, "Font signature"}, + {EXTH_UNK403, EXTH_BINARY, "Unknown (403)"}, + {EXTH_UNK405, EXTH_BINARY, "Unknown (405)"}, + {EXTH_UNK407, EXTH_BINARY, "Unknown (407)"}, + {EXTH_UNK450, EXTH_BINARY, "Unknown (450)"}, + {EXTH_UNK451, EXTH_BINARY, "Unknown (451)"}, + {EXTH_UNK452, EXTH_BINARY, "Unknown (452)"}, + {EXTH_UNK453, EXTH_BINARY, "Unknown (453)"}, + /* end */ + {0, 0, NULL}, +}; + +/** + @brief Get MOBIExthMeta tag structure by MOBIExthTag tag id + + @param[in] tag Tag id + @return MOBIExthMeta structure for given tag id, zeroed structure on failure + */ +MOBIExthMeta mobi_get_exthtagmeta_by_tag(const MOBIExthTag tag) { + size_t i = 0; + while (mobi_exth_tags[i].tag > 0) { + if (mobi_exth_tags[i].tag == tag) { + return mobi_exth_tags[i]; } + i++; } + return (MOBIExthMeta) {0, 0, NULL}; } -size_t mobi_get_record_extrasize(MOBIPdbRecord *record, uint16_t flags) { - size_t num, size; - num = 0; - size = record->size; - int mb_flag = flags & 1; - flags >>= 1; - while (flags) { - if (flags & 1) { - num += sizeof_trailing_entry(record, size - num); - } - flags >>= 1; - } - if (mb_flag){ - num += (*(record->data + size - num - 1) & 0x3) + 1; - } - return num; +/** + @brief Decode big-endian value stored in EXTH record + + Only for EXTH records storing numeric values + + @param[in] data Memory area storing EXTH record data + @param[in] size Size of EXTH record data + @return 32-bit value + */ +uint32_t mobi_decode_exthvalue(const unsigned char *data, const size_t size) { + /* FIXME: EXTH numeric data is max 32-bit? */ + uint32_t val = 0; + size_t i = min(size, 4); + while (i--) { + val |= (uint32_t) *data++ << (i * 8); + } + return val; } -/*size_t mobi_get_record_extrasize(MOBIPdbRecord *record, uint16_t flags) { - size_t extra_size = 0, offset = 1; - uint8_t b; - for (int bit = 15; bit > 0; bit--) { - if (flags & (1 << bit)) { - // bit is set - int bit_count = 0; - do { - // read at most 4 * 7-bit ints, bit 7 set stops search - b = *(record->data + record->size - offset); - extra_size |= (b & 0x7f) << bit_count; - bit_count += 7; - offset++; - } while (!(b & 0x80) && (bit_count < 28) && offset < record->size); - offset += extra_size - 1; - } - }; - // check bit 0 - if (flags & 1) { - if (offset < record->size) { - b = *(record->data + record->size - offset); - // two first bits hold size - extra_size += (b & 0x3) + 1; - } - - } - return extra_size; -}*/ - -// wrapper for mobi_get_rawml and mobi_dump_rawml -int mobi_decompress_content(MOBIData *m, char *text, FILE *file, size_t len, int dump) { - MOBIPdbRecord *curr; - size_t text_rec_index; - size_t offset = 0; - size_t text_length = 0; - if (m == NULL) { - printf("Mobi structure not initialized\n"); - return MOBI_ERROR; +/** + @brief Decode string stored in EXTH record + + Only for EXTH records storing string values + + @param[in] m MOBIData structure loaded with MOBI data + @param[in] data Memory area storing EXTH record data + @param[in] size Size of EXTH record data + @return String from EXTH record in utf-8 encoding + */ +char * mobi_decode_exthstring(const MOBIData *m, const unsigned char *data, const size_t size) { + if (!m || !data) { + return NULL; } - // check if we want to parse kf8 part of joint file - if (m->use_kf8 && m->next != NULL) { - int kf8_offset = mobi_get_kf8boundary(m->next); - if (kf8_offset >= 0) { - // kf8 boundary + 1 * record0 - offset = kf8_offset + 1; + size_t out_length = 3 * size + 1; + size_t in_length = size; + char string[out_length]; + if (mobi_is_cp1252(m)) { + MOBI_RET ret = mobi_cp1252_to_utf8(string, (const char *) data, &out_length, in_length); + if (ret != MOBI_SUCCESS) { + return NULL; } + } else { + memcpy(string, data, size); + out_length = size; + } + string[out_length] = '\0'; + char *exth_string = strdup(string); + return exth_string; +} + +/** + @brief Convert time values from palmdoc header to time tm struct + + Older files set time in mac format. Newer ones in unix time. + + @param[in] pdb_time Time value from PDB header + @return Time structure struct tm of time.h + */ +struct tm * mobi_pdbtime_to_time(const long pdb_time) { + time_t time = pdb_time; + const uint32_t mactime_flag = (uint32_t) (1 << 31); + if (time & mactime_flag) { + printf("MAC TIME\n"); + time += EPOCH_MAC_DIFF; + } + return localtime(&time); +} + +/** + @brief Lookup table for number of bits set in a single byte + */ +static const char setbits[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; + +/** + @brief Get number of bits set in a given byte + + @param[in] byte A byte + @return Number of bits set + */ +int mobi_bitcount(const uint8_t byte) { + return setbits[byte]; +} + +/** + @brief Decompress text record (internal). + + Internal function for mobi_get_rawml and mobi_dump_rawml. + Decompressed output is stored either in a file or in a text string + + @param[in] m MOBIData structure loaded with MOBI data + @param[in,out] text Memory area to be filled with decompressed output + @param[in,out] file If not NULL output is written to the file, otherwise to text string + @param[in,out] len Length of the memory allocated for the text string, on return set to decompressed text length + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +static MOBI_RET mobi_decompress_content(const MOBIData *m, char *text, FILE *file, size_t *len) { + if (mobi_is_encrypted(m)) { + debug_print("%s", "Document is encrypted\n"); + return MOBI_FILE_ENCRYPTED; + } + int dump = false; + if (file != NULL) { + dump = true; } + if (m == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_INIT_FAILED; + } + const size_t offset = mobi_get_kf8offset(m); if (m->rh == NULL || m->rh->text_record_count == 0) { - printf("Text records not found in MOBI header\n"); - return MOBI_ERROR; + debug_print("%s", "Text records not found in MOBI header\n"); + return MOBI_DATA_CORRUPT; } - text_rec_index = 1 + offset; + const size_t text_rec_index = 1 + offset; size_t text_rec_count = m->rh->text_record_count; - uint16_t compression_type = m->rh->compression_type; - // check for extra data at the end of text files - uint16_t extra_flags = 0, extra_size = 0; + const uint16_t compression_type = m->rh->compression_type; + /* check for extra data at the end of text files */ + uint16_t extra_flags = 0; if (m->mh && m->mh->extra_flags) { extra_flags = *m->mh->extra_flags; } - // get first text record - curr = mobi_get_record_by_seqnumber(m, text_rec_index); - - size_t d_size, record_size; - char decompressed[2*RECORD0_RECORD_SIZE_MAX + 32]; // FIXME debug + /* get first text record */ + const MOBIPdbRecord *curr = mobi_get_record_by_seqnumber(m, text_rec_index); MOBIHuffCdic *huffcdic = NULL; if (compression_type == RECORD0_HUFF_COMPRESSION) { - // load huff/cdic tables - huffcdic = mobi_init_huffcdic(m); - } - // get following CDIC records - while (text_rec_count--) { - if (curr->uid == 17622) { // FIXME debug - ;; + /* load huff/cdic tables */ + huffcdic = mobi_init_huffcdic(); + if (huffcdic == NULL) { + return MOBI_MALLOC_FAILED; + } + MOBI_RET ret = mobi_parse_huffdic(m, huffcdic); + if (ret != MOBI_SUCCESS) { + free(huffcdic); + return ret; } + } + /* get following CDIC records */ + size_t text_length = 0; + while (text_rec_count-- && curr) { + size_t extra_size = 0; if (extra_flags) { extra_size = mobi_get_record_extrasize(curr, extra_flags); + if (extra_size == MOBI_NOTSET || extra_size >= curr->size) { + return MOBI_DATA_CORRUPT; + } } - record_size = curr->size - extra_size; + const size_t record_size = curr->size - extra_size; + unsigned char decompressed[RECORD0_TEXT_SIZE_MAX]; + /* FIXME: RECORD0_TEXT_SIZE_MAX should be enough */ + size_t decompressed_size = RECORD0_TEXT_SIZE_MAX; switch (compression_type) { case RECORD0_NO_COMPRESSION: - // no compression - strncat(decompressed, curr->data, curr->size); - d_size = curr->size; + /* no compression */ + memcpy(decompressed, curr->data, curr->size); + decompressed_size = curr->size; break; case RECORD0_PALMDOC_COMPRESSION: - // palmdoc lz77 compression - d_size = mobi_decompress_lz77(decompressed, curr->data, record_size); + /* palmdoc lz77 compression */ + mobi_decompress_lz77(decompressed, curr->data, &decompressed_size, record_size); break; case RECORD0_HUFF_COMPRESSION: - // mobi huffman compression - d_size = mobi_decompress_huffman(decompressed, curr->data, record_size, huffcdic, 0); - if (d_size > RECORD0_RECORD_SIZE_MAX) { - d_size = RECORD0_RECORD_SIZE_MAX; - } + /* mobi huffman compression */ + mobi_decompress_huffman(decompressed, curr->data, &decompressed_size, record_size, huffcdic); break; default: - printf("Unknown compression type\n"); - return MOBI_ERROR; + debug_print("%s", "Unknown compression type\n"); + return MOBI_DATA_CORRUPT; } curr = curr->next; - text_length += d_size; - if (dump) { - fwrite(decompressed, 1, d_size, file); + fwrite(decompressed, 1, decompressed_size, file); } else { - if (text_length > len) { - printf("Text buffer too small\n"); - // free huff/cdic tables + if (text_length > *len) { + debug_print("%s", "Text buffer too small\n"); + /* free huff/cdic tables */ if (compression_type == RECORD0_HUFF_COMPRESSION) { mobi_free_huffcdic(huffcdic); } - return MOBI_ERROR; + return MOBI_PARAM_ERR; } - strncat(text, decompressed, d_size); + memcpy(text + text_length, decompressed, decompressed_size); + text_length += decompressed_size; + text[text_length] = '\0'; } + } - // free huff/cdic tables + /* free huff/cdic tables */ if (compression_type == RECORD0_HUFF_COMPRESSION) { mobi_free_huffcdic(huffcdic); } + if (len) { + *len = text_length; + } return MOBI_SUCCESS; } -// copy raw text to text buffer -int mobi_get_rawml(MOBIData *m, char *text, size_t len) { - if (m->rh->text_length > len) { - printf("Text buffer smaller then text size declared in record0 header\n"); - return MOBI_ERROR; +/** + @brief Decompress text to a text buffer. + + @param[in] m MOBIData structure loaded with MOBI data + @param[in,out] text Memory area to be filled with decompressed output + @param[in,out] len Length of the memory allocated for the text string, on return will be set to decompressed text length + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_get_rawml(const MOBIData *m, char *text, size_t *len) { + if (m->rh->text_length > *len) { + debug_print("%s", "Text buffer smaller then text size declared in record0 header\n"); + return MOBI_PARAM_ERR; } text[0] = '\0'; - int ret = mobi_decompress_content(m, text, NULL, len, 0); - return ret; + return mobi_decompress_content(m, text, NULL, len); +} + +/** + @brief Decompress text record to an open file descriptor. + + Internal function for mobi_get_rawml and mobi_dump_rawml. + Decompressed output is stored either in a file or in a text string + + @param[in] m MOBIData structure loaded with MOBI data + @param[in,out] file File descriptor + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_dump_rawml(const MOBIData *m, FILE *file) { + if (file == NULL) { + debug_print("%s", "File descriptor is NULL\n"); + return MOBI_FILE_NOT_FOUND; + } + return mobi_decompress_content(m, NULL, file, NULL); +} + +/** + @brief Check if MOBI header is loaded / present in the loaded file + + @param[in] m MOBIData structure loaded with MOBI data + @return true on success, false otherwise + */ +bool mobi_exists_mobiheader(const MOBIData *m) { + if (m == NULL || m->mh == NULL) { + return false; + } + return true; +} + +/** + @brief Check if skeleton INDX is present in the loaded file + + @param[in] m MOBIData structure loaded with MOBI data + @return true on success, false otherwise + */ +bool mobi_exists_skel_indx(const MOBIData *m) { + if (!mobi_exists_mobiheader(m)) { + return false; + } + if (m->mh->skeleton_index == NULL || *m->mh->skeleton_index == MOBI_NOTSET) { + debug_print("%s", "SKEL INDX record not found\n"); + return false; + } + return true; +} + +/** + @brief Check if FDST record is present in the loaded file + + @param[in] m MOBIData structure loaded with MOBI data + @return true on success, false otherwise + */ +bool mobi_exists_fdst(const MOBIData *m) { + if (!mobi_exists_mobiheader(m)) { + return false; + } + if (mobi_get_fileversion(m) >= 8) { + if (m->mh->fdst_index && *m->mh->fdst_index != MOBI_NOTSET) { + return true; + } + } else { + if (m->mh->fdst_section_count && *m->mh->fdst_section_count > 1) { + return true; + } + } + debug_print("%s", "FDST record not found\n"); + return false; +} + +/** + @brief Get sequential number of FDST record + + @param[in] m MOBIData structure loaded with MOBI data + @return Record number on success, MOBI_NOTSET otherwise + */ +size_t mobi_get_fdst_record_number(const MOBIData *m) { + const size_t offset = mobi_get_kf8offset(m); + if (m->mh->fdst_index && *m->mh->fdst_index != MOBI_NOTSET) { + if (m->mh->fdst_section_count && *m->mh->fdst_section_count > 1) { + return *m->mh->fdst_index + offset; + } + } + if (m->mh->fdst_section_count && *m->mh->fdst_section_count > 1) { + /* FIXME: if KF7, is it safe to asume last_text_index has fdst index */ + return *m->mh->last_text_index; + } + return MOBI_NOTSET; +} + +/** + @brief Check if fragments INDX is present in the loaded file + + @param[in] m MOBIData structure loaded with MOBI data + @return true on success, false otherwise + */ +bool mobi_exists_frag_indx(const MOBIData *m) { + if (!mobi_exists_mobiheader(m)) { + return false; + } + if (m->mh->fragment_index == NULL || *m->mh->fragment_index == MOBI_NOTSET) { + debug_print("%s", "Fragments INDX not found\n"); + return false; + } + return true; +} + +/** + @brief Check if guide INDX is present in the loaded file + + @param[in] m MOBIData structure loaded with MOBI data + @return true on success, false otherwise + */ +bool mobi_exists_guide_indx(const MOBIData *m) { + if (!mobi_exists_mobiheader(m)) { + return false; + } + if (m->mh->guide_index == NULL || *m->mh->guide_index == MOBI_NOTSET) { + debug_print("%s", "Guide INDX not found\n"); + return false; + } + return true; +} + +/** + @brief Check if ncx INDX is present in the loaded file + + @param[in] m MOBIData structure loaded with MOBI data + @return true on success, false otherwise + */ +bool mobi_exists_ncx(const MOBIData *m) { + if (!mobi_exists_mobiheader(m)) { + return false; + } + if (m->mh->ncx_index == NULL || *m->mh->ncx_index == MOBI_NOTSET) { + debug_print("%s", "NCX INDX not found\n"); + return false; + } + return true; +} + +/** + @brief Check if orth INDX is present in the loaded file + + @param[in] m MOBIData structure loaded with MOBI data + @return true on success, false otherwise + */ +bool mobi_exists_orth(const MOBIData *m) { + if (!mobi_exists_mobiheader(m)) { + return false; + } + if (m->mh->orth_index == NULL || *m->mh->orth_index == MOBI_NOTSET) { + debug_print("%s", "ORTH INDX not found\n"); + return false; + } + return true; +} + +/** + @brief Get file type of given part with number [part_number] + + @param[in] rawml MOBIRawml parsed records structure + @param[in] part_number Sequential number of the part within rawml structure + @return MOBIFiletype file type + */ +MOBIFiletype mobi_determine_flowpart_type(const MOBIRawml *rawml, const size_t part_number) { + if (part_number == 0 || rawml->version == MOBI_NOTSET || rawml->version < 8) { + return T_HTML; + } + char target[24]; + sprintf(target, "\"kindle:flow:%04zu?mime=", part_number); + unsigned char *data_start = rawml->flow->data; + unsigned char *data_end = data_start + rawml->flow->size; + MOBIResult result; + MOBI_RET ret = mobi_search_markup(&result, data_start, data_end, T_HTML, target); + if (ret == MOBI_SUCCESS && result.start) { + if (strstr(result.value, "text/css")) { + return T_CSS; + } else if (strstr(result.value, "image/svg+xml")) { + return T_SVG; + } + } + return T_UNKNOWN; } -// dump raw text records to open file descriptor -int mobi_dump_rawml(MOBIData *m, FILE *file) { - int ret = mobi_decompress_content(m, NULL, file, 0, 1); - return ret; +/** + @brief Get font type of given font resource + + @param[in] font_data Font resource data + @return MOBIFiletype file type + */ +MOBIFiletype mobi_determine_font_type(const unsigned char *font_data) { + const char otf_magic[] = "OTTO"; + const char ttf_magic[] = "\0\1\0\0"; + const char ttf2_magic[] = "true"; + + if (memcmp(font_data, otf_magic, 4) == 0) { + return T_OTF; + } else if (memcmp(font_data, ttf_magic, 4) == 0) { + return T_TTF; + } else if (memcmp(font_data, ttf2_magic, 4) == 0) { + return T_TTF; + } + return T_UNKNOWN; } -// return kf8 boundary record sequential number or -1 if no such record -int mobi_get_kf8boundary(MOBIData *m) { - MOBIExtHeader *exth_tag; - MOBIPdbRecord *record; - uint32_t rec_number; +/** + @brief Replace part data with decoded audio data + + @param[in,out] part MOBIPart structure containing font resource, decoded part type will be set in the structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_add_audio_resource(MOBIPart *part) { + unsigned char *data = NULL; + size_t size = 0; + MOBI_RET ret = mobi_decode_audio_resource(&data, &size, part); + if (ret != MOBI_SUCCESS) { + return ret; + } + part->data = data; + part->size = size; + /* FIXME: the only possible audio type is mp3 */ + part->type = T_MP3; + ; + return MOBI_SUCCESS; +} + +/** + @brief Decode audio resource + + @param[in,out] decoded_resource Pointer to data offset in mobipocket record. + @param[in,out] decoded_size Decoded resource data size + @param[in,out] part MOBIPart structure containing resource, decoded part type will be set in the structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_decode_audio_resource(unsigned char **decoded_resource, size_t *decoded_size, MOBIPart *part) { + if (part->size < MEDIA_HEADER_LEN) { + debug_print("Audio resource record too short (%zu)\n", part->size); + return MOBI_DATA_CORRUPT; + } + MOBIBuffer *buf = buffer_init_null(part->size); + buf->data = part->data; + char magic[5]; + buffer_getstring(magic, buf, 4); + if (strncmp(magic, AUDI_MAGIC, 4) != 0) { + debug_print("Wrong magic for audio resource: %s\n", magic); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; + } + uint32_t offset = buffer_get32(buf); + buf->offset = offset; + *decoded_size = buf->maxlen - buf->offset; + *decoded_resource = buf->data; + buffer_free_null(buf); + return MOBI_SUCCESS; +} + +/** + @brief Replace part data with decoded video data + + @param[in,out] part MOBIPart structure containing font resource, decoded part type will be set in the structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_add_video_resource(MOBIPart *part) { + unsigned char *data = NULL; + size_t size = 0; + MOBI_RET ret = mobi_decode_video_resource(&data, &size, part); + if (ret != MOBI_SUCCESS) { + return ret; + } + part->data = data; + part->size = size; + part->type = T_MPG; /* FIXME: other types? */ +; + return MOBI_SUCCESS; +} + +/** + @brief Decode video resource + + @param[in,out] decoded_resource Pointer to data offset in mobipocket record. + @param[in,out] decoded_size Decoded resource data size + @param[in,out] part MOBIPart structure containing resource, decoded part type will be set in the structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_decode_video_resource(unsigned char **decoded_resource, size_t *decoded_size, MOBIPart *part) { + if (part->size < MEDIA_HEADER_LEN) { + debug_print("Video resource record too short (%zu)\n", part->size); + return MOBI_DATA_CORRUPT; + } + MOBIBuffer *buf = buffer_init_null(part->size); + buf->data = part->data; + char magic[5]; + buffer_getstring(magic, buf, 4); + if (strncmp(magic, VIDE_MAGIC, 4) != 0) { + debug_print("Wrong magic for audio resource: %s\n", magic); + buffer_free_null(buf); + return MOBI_DATA_CORRUPT; + } + uint32_t offset = buffer_get32(buf); + /* offset is always(?) 12, next four bytes are unknown */ + buf->offset = offset; + *decoded_size = buf->maxlen - buf->offset; + *decoded_resource = buf->data; + buffer_free_null(buf); + return MOBI_SUCCESS; +} + +/** + @brief Replace part data with decoded font data + + @param[in,out] part MOBIPart structure containing font resource, decoded part type will be set in the structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_add_font_resource(MOBIPart *part) { + unsigned char *data = NULL; + size_t size = 0; + MOBI_RET ret = mobi_decode_font_resource(&data, &size, part); + if (ret != MOBI_SUCCESS) { + return ret; + } + part->data = data; + part->size = size; + part->type = mobi_determine_font_type(data); + return MOBI_SUCCESS; +} + +/** + @brief Deobfuscator and decompressor for font resources + + @param[in,out] decoded_font Pointer to memory to write to. Will be allocated. Must be freed by caller + @param[in,out] decoded_size Decoded font data size + @param[in,out] part MOBIPart structure containing font resource, decoded part type will be set in the structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_decode_font_resource(unsigned char **decoded_font, size_t *decoded_size, MOBIPart *part) { + if (part->size < FONT_HEADER_LEN) { + debug_print("Font resource record too short (%zu)\n", part->size); + return MOBI_DATA_CORRUPT; + } + MOBIBuffer *buf = buffer_init(part->size); + memcpy(buf->data, part->data, part->size); + struct header { + char magic[5]; + uint32_t decoded_size; + uint32_t flags; + uint32_t data_offset; + uint32_t xor_key_len; + uint32_t xor_data_off; + }; + struct header h; + buffer_getstring(h.magic, buf, 4); + if (strncmp(h.magic, FONT_MAGIC, 4) != 0) { + debug_print("Wrong magic for font resource: %s\n", h.magic); + buffer_free(buf); + return MOBI_DATA_CORRUPT; + } + h.decoded_size = buffer_get32(buf); + h.flags = buffer_get32(buf); + h.data_offset = buffer_get32(buf); + h.xor_key_len = buffer_get32(buf); + h.xor_data_off = buffer_get32(buf); + const uint32_t zlib_flag = 1; /* bit 0 */ + const uint32_t xor_flag = 2; /* bit 1 */ + if (h.flags & xor_flag) { + /* deobfuscate */ + buf->offset = h.data_offset; + const unsigned char *xor_key = buf->data + h.xor_data_off; + size_t i = 0; + /* only xor first 1040 bytes */ + while (buf->offset < buf->maxlen && i < 1040) { + buf->data[buf->offset++] ^= xor_key[i % h.xor_key_len]; + i++; + } + } + buf->offset = h.data_offset; + *decoded_size = h.decoded_size; + *decoded_font = malloc(h.decoded_size); + const unsigned char *encoded_font = buf->data + buf->offset; + const unsigned long encoded_size = buf->maxlen - buf->offset; + if (h.flags & zlib_flag) { + /* unpack */ + int ret = m_uncompress(*decoded_font, (unsigned long *) decoded_size, encoded_font, encoded_size); + if (ret != M_OK) { + buffer_free(buf); + free(*decoded_font); + debug_print("%s", "Font resource decompression failed\n"); + return MOBI_DATA_CORRUPT; + } + if (*decoded_size != h.decoded_size) { + buffer_free(buf); + free(*decoded_font); + debug_print("Decompressed font size (%zu) differs from declared (%i)\n", *decoded_size, h.decoded_size); + return MOBI_DATA_CORRUPT; + } + } else { + memcpy(*decoded_font, encoded_font, encoded_size); + } + + buffer_free(buf); + return MOBI_SUCCESS; +} + +/** + @brief Get resource type (image, font) by checking its magic header + + @param[in] record MOBIPdbRecord structure containing unknown record type + @return MOBIFiletype file type, T_UNKNOWN if not determined, T_BREAK if end of records mark found + */ +MOBIFiletype mobi_determine_resource_type(const MOBIPdbRecord *record) { + /* Kindle supports GIF, BMP, JPG, PNG, SVG images. */ + /* GIF: 47 49 46 38 37 61 (GIF87a), 47 49 46 38 39 61 (GIF89a) */ + /* BMP: 42 4D (BM) + 4 byte file length le */ + /* JPG: FF D8 FF (header) + FF D9 (trailer) */ + /* PNG: 89 50 4E 47 0D 0A 1A 0A */ + /* SVG is XML-based format, so stored in flow parts */ + /* FONT: must be decoded */ + const unsigned char jpg_magic[] = "\xff\xd8\xff"; + const unsigned char gif_magic[] = "\x47\x49\x46\x38"; + const unsigned char png_magic[] = "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"; + const unsigned char bmp_magic[] = "\x42\x4d"; + const unsigned char font_magic[] = FONT_MAGIC; + const unsigned char audio_magic[] = AUDI_MAGIC; + const unsigned char video_magic[] = VIDE_MAGIC; + const unsigned char boundary_magic[] = BOUNDARY_MAGIC; + const unsigned char eof_magic[] = EOF_MAGIC; + if (memcmp(record->data, jpg_magic, 3) == 0) { + return T_JPG; + } else if (memcmp(record->data, gif_magic, 4) == 0) { + return T_GIF; + } else if (memcmp(record->data, png_magic, 8) == 0) { + return T_PNG; + } else if (memcmp(record->data, font_magic, 4) == 0) { + return T_FONT; + } else if (memcmp(record->data, boundary_magic, 8) == 0) { + return T_BREAK; + } else if (memcmp(record->data, eof_magic, 4) == 0) { + return T_BREAK; + } else if (memcmp(record->data, bmp_magic, 2) == 0) { + const size_t bmp_size = (uint32_t) record->data[2] | (uint32_t) record->data[3] << 8 | (uint32_t) record->data[4] << 16 | (uint32_t) record->data[5] << 24; + if (record->size == bmp_size) { + return T_BMP; + } + } else if (memcmp(record->data, audio_magic, 4) == 0) { + return T_AUDIO; + } else if (memcmp(record->data, video_magic, 4) == 0) { + return T_VIDEO; + } + return T_UNKNOWN; +} + +/** + @brief Check if loaded MOBI data is KF7/KF8 hybrid file + + @param[in] m MOBIData structure with loaded Record(s) 0 headers + @return true or false + */ +bool mobi_is_hybrid(const MOBIData *m) { if (m == NULL) { - printf("Mobi structure not initialized\n"); - return -1; + debug_print("%s", "Mobi structure not initialized\n"); + return false; } - exth_tag = mobi_get_exthtag_by_uid(m, MOBI_EXTH_KF8BOUNDARY); - if (exth_tag != NULL) { - rec_number = * (uint32_t*) exth_tag->data; - if (is_littleendian()) { - rec_number = endian_swap32(rec_number); + if (m->kf8_boundary_offset != MOBI_NOTSET) { + return true; + } + return false; +} + +/** + @brief Check if loaded document is MOBI/BOOK Mobipocket format + + @param[in] m MOBIData structure with loaded Record(s) 0 headers + @return true or false + */ +bool mobi_is_mobipocket(const MOBIData *m) { + if (m == NULL || m->ph == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return false; + } + if (strcmp(m->ph->type, "BOOK") == 0 && + strcmp(m->ph->creator, "MOBI") == 0) { + return true; + } + return false; +} + +/** + @brief Check if loaded document is encrypted + + @param[in] m MOBIData structure with loaded Record(s) 0 headers + @return true or false + */ +bool mobi_is_encrypted(const MOBIData *m) { + if (m == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return false; + } + if (mobi_is_mobipocket(m) && m->rh && + (m->rh->encryption_type == RECORD0_OLD_ENCRYPTION || + m->rh->encryption_type == RECORD0_MOBI_ENCRYPTION)) { + return true; + } + return false; +} + +/** + @brief Get mobi file version + + @param[in] m MOBIData structure with loaded Record(s) 0 headers + @return MOBI document version, 1 if ancient version (no MOBI header) or MOBI_NOTSET if error + */ +size_t mobi_get_fileversion(const MOBIData *m) { + if (m == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_NOTSET; + } + if (m && m->mh && m->mh->version) { + return *m->mh->version; + } + return 1; +} + +/** + @brief Get maximal size of uncompessed text records + + @param[in] m MOBIData structure with loaded Record(s) 0 headers + @return Size of text or MOBI_NOTSET if error + */ +size_t mobi_get_text_maxsize(const MOBIData *m) { + if (m && m->rh) { + /* FIXME: is it safe to use data from Record 0 header? */ + if (m->rh->text_record_count > 0) { + return (m->rh->text_record_count * RECORD0_TEXT_SIZE_MAX); } + } + return MOBI_NOTSET; +} + +/** + @brief Get sequential number of first resource record (image/font etc) + + @param[in] m MOBIData structure with loaded Record(s) 0 headers + @return Record number or MOBI_NOTSET if not set + */ +size_t mobi_get_first_resource_record(const MOBIData *m) { + /* is it hybrid file? */ + if (mobi_is_hybrid(m) && m->use_kf8) { + /* get first image index from KF7 mobi header */ + if (m->next->mh->image_index) { + return *m->next->mh->image_index; + } + } + /* try to get it from currently set mobi header */ + if (m->mh && m->mh->image_index) { + return *m->mh->image_index; + } + return MOBI_NOTSET; +} + + +/** + @brief Calculate exponentiation for unsigned base and exponent + + @param[in] base Base + @param[in] exp Exponent + @return Result of base raised by the exponent exp + */ +size_t mobi_pow(unsigned base, unsigned exp) { + size_t result = 1; + while(exp) { + if (exp & 1) { + result *= base; + } + exp >>= 1; + base *= base; + } + return result; +} + +/** + @brief Decode positive number from base 32 to base 10. + + Base 32 characters must be upper case. + Maximal supported value is VVVVVV. + + @param[in,out] decoded Base 10 output number + @param[in] encoded Base 32 input number + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_base32_decode(uint32_t *decoded, const char *encoded) { + if (!encoded || !decoded) { + debug_print("Error, null parameter (decoded: %p, encoded: %p)\n", (void *) decoded, (void *) encoded) + return MOBI_PARAM_ERR; + } + /* strip leading zeroes */ + while (*encoded == '0') { + encoded++; + } + size_t encoded_length = strlen(encoded); + /* Let's limit input to 6 chars. VVVVVV(32) is 0x3FFFFFFF */ + if (encoded_length > 6) { + debug_print("Base 32 number too big: %s\n", encoded); + return MOBI_PARAM_ERR; + } + const unsigned char *c = (unsigned char *) encoded; + unsigned len = (unsigned) encoded_length; + const unsigned base = 32; + *decoded = 0; + unsigned value; + while (*c) { + /* FIXME: not portable, should we care? */ + if (*c >= 'A' && *c <= 'V') { + value = *c - 'A' + 10; + } + else if (*c >= '0' && *c <= '9') { + value = *c - '0'; + } + else { + debug_print("Illegal character: \"%c\"\n", *c); + return MOBI_DATA_CORRUPT; + } + *decoded += value * mobi_pow(base, --len); + c++; + } + return MOBI_SUCCESS; +} + + +/** + @brief Get offset of KF8 Boundary for KF7/KF8 hybrid file cached in MOBIData structure + + @param[in] m MOBIData structure + @return KF8 Boundary sequential number or zero if not found + */ +size_t mobi_get_kf8offset(const MOBIData *m) { + /* check if we want to parse KF8 part of joint file */ + if (m->use_kf8 && m->kf8_boundary_offset != MOBI_NOTSET) { + return m->kf8_boundary_offset + 1; + } + return 0; +} + +/** + @brief Get sequential number of KF8 Boundary record for KF7/KF8 hybrid file + + This function gets KF8 boundary offset from EXTH header + + @param[in] m MOBIData structure + @return KF8 Boundary record sequential number or MOBI_NOTSET if not found + */ +size_t mobi_get_kf8boundary_seqnumber(const MOBIData *m) { + if (m == NULL) { + debug_print("%s", "Mobi structure not initialized\n"); + return MOBI_NOTSET; + } + const MOBIExthHeader *exth_tag = mobi_get_exthrecord_by_tag(m, EXTH_KF8BOUNDARY); + if (exth_tag != NULL) { + uint32_t rec_number = mobi_decode_exthvalue(exth_tag->data, exth_tag->size); rec_number--; - record = mobi_get_record_by_seqnumber(m, rec_number); + const MOBIPdbRecord *record = mobi_get_record_by_seqnumber(m, rec_number); if (record) { - if(strcmp(record->data, "BOUNDARY") == 0) { + if(memcmp(record->data, "BOUNDARY", 8) == 0) { return rec_number; } } } - return -1; + return MOBI_NOTSET; +} + +/** + @brief Loader will parse KF7 part of hybrid file + + @param[in,out] m MOBIData structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_kf7(MOBIData *m) { + if (m == NULL) { + return MOBI_INIT_FAILED; + } + m->use_kf8 = false; + return MOBI_SUCCESS; +} + +/** + @brief Loader will parse KF8 part of hybrid file + + This is the default option. + + @param[in,out] m MOBIData structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_parse_kf8(MOBIData *m) { + if (m == NULL) { + return MOBI_INIT_FAILED; + } + m->use_kf8 = true; + return MOBI_SUCCESS; } -int mobi_swap_mobidata(MOBIData *m) { - MOBIData *tmp; - tmp = malloc(sizeof(MOBIData)); +/** + @brief Swap KF7 and KF8 MOBIData structures in a hybrid file + + MOBIData structures form a circular linked list in case of hybrid files. + By default KF8 structure is first one in the list. + This function puts KF7 structure on the first place, so that it starts to be used by default. + + @param[in,out] m MOBIData structure + @return MOBI_RET status code (on success MOBI_SUCCESS) + */ +MOBI_RET mobi_swap_mobidata(MOBIData *m) { + MOBIData *tmp = malloc(sizeof(MOBIData)); if (tmp == NULL) { - printf("memory allocation failed while swaping data\n"); - return MOBI_ERROR; + debug_print("%s", "Memory allocation failed while swaping data\n"); + return MOBI_MALLOC_FAILED; } tmp->rh = m->rh; tmp->mh = m->mh; diff --git a/src/util.h b/src/util.h index 61b8e8f..4b74a1d 100644 --- a/src/util.h +++ b/src/util.h @@ -1,17 +1,139 @@ -// -// util.h -// mobi -// -// Created by Bartek on 08.04.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file util.h + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ -#ifndef mobi_util_h -#define mobi_util_h +#ifndef libmobi_util_h +#define libmobi_util_h +#include "config.h" #include "mobi.h" #include "memory.h" +#include "buffer.h" +#include "compression.h" -int mobi_delete_record_by_seqnumber(MOBIData *m, size_t num); -int mobi_swap_mobidata(MOBIData *m); +#if !defined HAVE_STRDUP || !defined __USE_BSD +/** @brief strdup replacement */ +#define strdup mobi_strdup +#endif + +#ifdef USE_MINIZ +#include "miniz.h" +#define m_uncompress mz_uncompress +#define M_OK MZ_OK +#else +#include +#define m_uncompress uncompress +#define M_OK Z_OK +#endif + +/** @brief Magic numbers of records */ +#define MOBI_MAGIC "MOBI" +#define EXTH_MAGIC "EXTH" +#define HUFF_MAGIC "HUFF" +#define CDIC_MAGIC "CDIC" +#define FDST_MAGIC "FDST" +#define INDX_MAGIC "INDX" +#define TAGX_MAGIC "TAGX" +#define IDXT_MAGIC "IDXT" +#define FONT_MAGIC "FONT" +#define AUDI_MAGIC "AUDI" +#define VIDE_MAGIC "VIDE" +#define BOUNDARY_MAGIC "BOUNDARY" +#define EOF_MAGIC "\xe9\x8e\r\n" +#define REPLICA_MAGIC "%MOP" + +/** @brief Difference in seconds between epoch time and mac time */ +#define EPOCH_MAC_DIFF 2082844800UL + +/** + @defgroup mobi_pdb Params for pdb record header structure + @{ + */ +#define PALMDB_HEADER_LEN 78 /**< Length of header without record info headers */ +#define PALMDB_NAME_SIZE_MAX 32 /**< Max length of db name stored at offset 0 */ +#define PALMDB_RECORD_INFO_SIZE 8 /**< Record info header size of each pdb record */ +/** @} */ + +/** + @defgroup mobi_pdb_defs Default values for pdb record header structure + @{ + */ +#define PALMDB_ATTRIBUTE_DEFAULT 0 +#define PALMDB_VERSION_DEFAULT 0 +#define PALMDB_MODNUM_DEFAULT 0 +#define PALMDB_APPINFO_DEFAULT 0 +#define PALMDB_SORTINFO_DEFAULT 0 +#define PALMDB_TYPE_DEFAULT "BOOK" +#define PALMDB_CREATOR_DEFAULT "MOBI" +#define PALMDB_NEXTREC_DEFAULT 0 +/** @} */ + +/** + @defgroup mobi_rec0 Params for record0 header structure + @{ + */ +#define RECORD0_HEADER_LEN 16 /**< Length of Record 0 header */ +#define RECORD0_NO_COMPRESSION 1 /**< Text record compression type: none */ +#define RECORD0_PALMDOC_COMPRESSION 2 /**< Text record compression type: palmdoc */ +#define RECORD0_HUFF_COMPRESSION 17480 /**< Text record compression type: huff/cdic */ +#define RECORD0_TEXT_SIZE_MAX 4096 /**< Max size of uncompressed text record */ +#define RECORD0_NO_ENCRYPTION 0 /**< Text record encryption type: none */ +#define RECORD0_OLD_ENCRYPTION 1 /**< Text record encryption type: old mobipocket */ +#define RECORD0_MOBI_ENCRYPTION 2 /**< Text record encryption type: mobipocket */ +/** @} */ + +/** + @defgroup mobi_len Header length / size of records + @{ + */ +#define CDIC_HEADER_LEN 16 +#define HUFF_HEADER_LEN 24 +#define HUFF_RECORD_MINSIZE 2584 +#define FONT_HEADER_LEN 24 +#define MEDIA_HEADER_LEN 12 +/** @} */ + +/** + @defgroup mobi_enc Encoding types in MOBI header (offset 28) + @{ + */ +typedef enum { + MOBI_UTF8 = 65001, + MOBI_CP1252 = 1252, +} MOBIEncoding; +/** @} */ + +/** + @defgroup mobi_return Values returned by functions + @{ + */ +#define MOBI_NOTSET UINT32_MAX /**< Value is not set */ +/** @} */ + +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) + +int mobi_bitcount(uint8_t byte); +MOBI_RET mobi_delete_record_by_seqnumber(MOBIData *m, size_t num); +MOBI_RET mobi_swap_mobidata(MOBIData *m); +char * mobi_strdup(const char *s); +bool mobi_is_cp1252(const MOBIData *m); +MOBI_RET mobi_cp1252_to_utf8(char *output, const char *input, size_t *outsize, const size_t insize); +MOBIPart * mobi_get_part_by_uid(const MOBIRawml *rawml, const size_t uid); +size_t mobi_get_first_resource_record(const MOBIData *m); +MOBIFiletype mobi_determine_resource_type(const MOBIPdbRecord *record); +MOBIFiletype mobi_determine_flowpart_type(const MOBIRawml *rawml, const size_t part_number); +MOBI_RET mobi_base32_decode(uint32_t *decoded, const char *encoded); +MOBIPart * mobi_get_flow_by_uid(const MOBIRawml *rawml, const size_t uid); +MOBIPart * mobi_get_resource_by_uid(const MOBIRawml *rawml, const size_t uid); +MOBIFiletype mobi_get_resourcetype_by_uid(const MOBIRawml *rawml, const size_t uid); +MOBI_RET mobi_add_audio_resource(MOBIPart *part); +MOBI_RET mobi_add_video_resource(MOBIPart *part); +MOBI_RET mobi_add_font_resource(MOBIPart *part); #endif diff --git a/src/write.c b/src/write.c index 050b708..49d35b3 100644 --- a/src/write.c +++ b/src/write.c @@ -1,27 +1,38 @@ -// -// write.c -// mobi -// -// Created by Bartek on 25.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/* + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ #include #include #include +#include #include "write.h" +#include "util.h" + +/* + I focus my development on reading functions. + Below are just a bunch of a quick tests, + which will probably disappear or be thoroughly modified + when I get to coding true writing routines. +*/ + +/* FIXME test */ +/* buffer should be passed to this func */ MOBIBuffer * serialize_palmdb_header(void) { MOBIBuffer *buf; - size_t len; - char title[PALMDB_NAME_SIZE_MAX]; - strcpy(title, "TITLE"); - len = strlen(title); + const char title[PALMDB_NAME_SIZE_MAX] = "TITLE"; + const size_t len = strlen(title); - uint32_t curtime = (uint32_t)(time(NULL) + EPOCH_MAC_DIFF); - uint32_t uid = 0xff; - uint32_t rec_count = 1; + const uint32_t curtime = (uint32_t) time(NULL); + const uint32_t uid = 0xff; + const uint32_t rec_count = 1; buf = buffer_init(PALMDB_HEADER_LEN); if (buf == NULL) { return NULL; @@ -30,9 +41,9 @@ MOBIBuffer * serialize_palmdb_header(void) { buffer_addzeros(buf, PALMDB_NAME_SIZE_MAX - len); buffer_add16(buf, PALMDB_ATTRIBUTE_DEFAULT); buffer_add16(buf, PALMDB_VERSION_DEFAULT); - buffer_add32(buf, curtime); // ctime - buffer_add32(buf, curtime); // mtime - buffer_add32(buf, 0); // btime + buffer_add32(buf, curtime); /* ctime */ + buffer_add32(buf, curtime); /* mtime */ + buffer_add32(buf, 0); /* btime */ buffer_add32(buf, PALMDB_MODNUM_DEFAULT); buffer_add32(buf, PALMDB_APPINFO_DEFAULT); buffer_add32(buf, PALMDB_SORTINFO_DEFAULT); @@ -44,11 +55,11 @@ MOBIBuffer * serialize_palmdb_header(void) { return buf; } +/* FIXME test */ MOBIBuffer * serialize_record0_header(void) { - MOBIBuffer *buf; uint32_t text_length = 0; uint16_t record_count = 0; - buf = buffer_init(RECORD0_HEADER_LEN); + MOBIBuffer *buf = buffer_init(RECORD0_HEADER_LEN); if (buf == NULL) { return NULL; } @@ -56,7 +67,7 @@ MOBIBuffer * serialize_record0_header(void) { buffer_add16(buf, 0); buffer_add32(buf, text_length); buffer_add16(buf, record_count); - buffer_add16(buf, RECORD0_RECORD_SIZE_MAX); + buffer_add16(buf, RECORD0_TEXT_SIZE_MAX); buffer_add16(buf, RECORD0_NO_ENCRYPTION); buffer_add16(buf, 0); return buf; @@ -70,11 +81,12 @@ void buffer_output(FILE *file, MOBIBuffer *buf) { buffer_free(buf); } -MOBIPdbRecord * build_pdbrecord(size_t offset) { +/* FIXME test */ +MOBIPdbRecord * build_pdbrecord(uint32_t offset) { MOBIPdbRecord *record = NULL; record = malloc(sizeof(MOBIPdbRecord)); - record->data = malloc(RECORD0_RECORD_SIZE_MAX); - strncpy(record->data, "test", RECORD0_RECORD_SIZE_MAX); + record->data = malloc(RECORD0_TEXT_SIZE_MAX); + memcpy(record->data, "test", RECORD0_TEXT_SIZE_MAX); if (record->data == NULL) { free(record); return NULL; @@ -87,30 +99,27 @@ MOBIPdbRecord * build_pdbrecord(size_t offset) { } MOBIBuffer * serialize_record_info(MOBIPdbRecord *rec) { - MOBIBuffer *buf; - buf = buffer_init(8); + MOBIBuffer *buf = buffer_init(8); if (buf == NULL) { return NULL; } - buffer_add32(buf, (uint32_t) rec->offset); - //skip attributes, always 0; + buffer_add32(buf, rec->offset); + /* skip attributes, always 0; */ buffer_add32(buf, rec->uid); return buf; } MOBIBuffer * serialize_pdbrecord(MOBIPdbRecord *rec) { - MOBIBuffer *buf; - buf = buffer_init(RECORD0_RECORD_SIZE_MAX); + MOBIBuffer *buf = buffer_init(RECORD0_TEXT_SIZE_MAX); if (buf) { - buffer_addstring(buf, rec->data); + buffer_addraw(buf, rec->data, rec->size); } return buf; } MOBIBuffer * serialize_file_end(void) { - MOBIBuffer *buf; - char end[] = { 233, 142, 13, 10 }; - buf = buffer_init(4); + const unsigned char end[4] = EOF_MAGIC; + MOBIBuffer *buf = buffer_init(4); if (buf) { buffer_addraw(buf, end, 4); } @@ -118,15 +127,17 @@ MOBIBuffer * serialize_file_end(void) { } +/* FIXME test */ void write_mobi(void) { - FILE *file; - MOBIBuffer *buf; - MOBIPdbRecord *rec; - file = fopen("/Users/baf/src/mobi_test/test.mobi","wb"); - buf = serialize_palmdb_header(); + FILE *file = fopen("test.mobi","wb"); + if (file == NULL) { + printf("Could not open file for writing\n"); + return; + } + MOBIBuffer *buf = serialize_palmdb_header(); printf("Writing palmdb header\n"); buffer_output(file, buf); - rec = build_pdbrecord(PALMDB_HEADER_LEN + PDB_RECORD_INFO_SIZE + 2); + MOBIPdbRecord *rec = build_pdbrecord(PALMDB_HEADER_LEN + PALMDB_RECORD_INFO_SIZE + 2); buf = serialize_record_info(rec); buf->maxlen += 2; buffer_addzeros(buf, 2); @@ -136,7 +147,7 @@ void write_mobi(void) { printf("Writing record0 header\n"); buffer_output(file, buf); buf = serialize_pdbrecord(rec); - // TODO: improve freeing of rec buffer, see buffer_free + /* TODO: improve freeing of rec buffer, see buffer_free */ free(rec->data); free(rec); printf("Writing pdb record\n"); diff --git a/src/write.h b/src/write.h index 5b96c3b..4eed087 100644 --- a/src/write.h +++ b/src/write.h @@ -1,20 +1,18 @@ -// -// write.h -// mobi -// -// Created by Bartek on 25.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/* + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * This file is part of libmobi. + * Licensed under LGPL, either version 3, or any later. + * See + */ -#ifndef mobi_write_h -#define mobi_write_h +#ifndef libmobi_write_h +#define libmobi_write_h +#include "config.h" #include "mobi.h" +#include "buffer.h" + -__attribute__((visibility("hidden"))) MOBIBuffer * buffer_init(size_t len); -__attribute__((visibility("hidden"))) void buffer_add8(MOBIBuffer *p, uint8_t data); -__attribute__((visibility("hidden"))) void buffer_add16(MOBIBuffer *p, uint16_t data); -__attribute__((visibility("hidden"))) void buffer_add32(MOBIBuffer *p, uint32_t data); -__attribute__((visibility("hidden"))) void buffer_addstring(MOBIBuffer *p, char *str); -__attribute__((visibility("hidden"))) void buffer_free(MOBIBuffer *p); #endif diff --git a/tools/Makefile.am b/tools/Makefile.am index 07517c2..e8fe898 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,12 +1,12 @@ # tools -# what flags you want to pass to the C compiler & linker -AM_CFLAGS = --pedantic -Wall -std=gnu99 -O2 -AM_LDFLAGS = - # this lists the binaries to produce, the (non-PHONY, binary) targets in # the previous manual Makefile bin_PROGRAMS = mobitool man_MANS = mobitool.1 +AM_CPPFLAGS = -I$(top_builddir)/src mobitool_SOURCES = mobitool.c -mobitool_LDADD = $(top_builddir)/src/libmobi.la +mobitool_DEPENDENCIES = $(top_builddir)/src/libmobi.la +mobitool_LDADD = $(top_builddir)/src/libmobi.la +mobitool_CFLAGS = $(ISO99_SOURCE) $(DEBUG_CFLAGS) -D_POSIX_C_SOURCE=200112L +mobitool_LDFLAGS = $(MOBITOOL_STATIC) diff --git a/tools/mobitool.1 b/tools/mobitool.1 index a8dd9a3..3f4ac9a 100644 --- a/tools/mobitool.1 +++ b/tools/mobitool.1 @@ -4,59 +4,44 @@ .\"man mdoc for the short list of editing options .\"/usr/share/misc/mdoc.template .Dd 26.03.14 \" DATE -.Dt test 1 \" Program name and manual section number -.Os Darwin +.Dt mobitool 1 \" Program name and manual section number +.Os Unix .Sh NAME \" Section Header - required - don't modify -.Nm test, +.Nm mobitool .\" The following lines are read in generating the apropos(man -k) database. Use only key .\" words here as the database is built based on the words here and in the .ND line. -.Nm Other_name_for_same_program(), -.Nm Yet another name for the same program. .\" Use .Nm macro to designate other names for the documented program. -.Nd This line parsed for whatis database. +.Nd Tool for handling MOBI format ebook files. .Sh SYNOPSIS \" Section Header - required - don't modify .Nm -.Op Fl abcd \" [-abcd] -.Op Fl a Ar path \" [-a path] -.Op Ar file \" [file] -.Op Ar \" [file ...] -.Ar arg0 \" Underlined argument - use .Ar anywhere to underline -arg2 ... \" Arguments +.Op Fl dmrsu7 \" [-dmrsu7] +.Ar file \" Underlined argument - use .Ar anywhere to underline .Sh DESCRIPTION \" Section Header - required - don't modify -Use the .Nm macro to refer to your program throughout the man page like such: -.Nm -Underlining is accomplished with the .Ar macro like this: -.Ar underlined text . -.Pp \" Inserts a space -A list of items with descriptions: -.Bl -tag -width -indent \" Begins a tagged list -.It item a \" Each item preceded by .It macro -Description of item a -.It item b -Description of item b -.El \" Ends the list +Written as a test case for libmobi library. +Run without arguments prints document metadata and exits. .Pp A list of flags and their descriptions: -.Bl -tag -width -indent \" Differs from above in tag removed -.It Fl a \"-a flag as a list item -Description of -a flag -.It Fl b -Description of -b flag +.Bl -tag -width -indent +.It Fl d +dump rawml text record +.It Fl m +print records metadata +.It Fl r +dump raw records +.It Fl s +dump recreated source files +.It Fl u +show version +.It Fl u +show version and exit +.It Fl 7 +parse KF7 part of hybrid file (by default KF8 part is parsed) .El \" Ends the list .Pp -.\" .Sh ENVIRONMENT \" May not be needed -.\" .Bl -tag -width "ENV_VAR_1" -indent \" ENV_VAR_1 is width of the string ENV_VAR_1 -.\" .It Ev ENV_VAR_1 -.\" Description of ENV_VAR_1 -.\" .It Ev ENV_VAR_2 -.\" Description of ENV_VAR_2 -.\" .El .Sh FILES \" File used or created by the topic of the man page .Bl -tag -width "/Users/joeuser/Library/really_long_file_name" -compact -.It Pa /usr/share/file_name -FILE_1 description -.It Pa /Users/joeuser/Library/really_long_file_name -FILE_2 description +.\"It Pa /usr/local/lib/libmobi.so +.\"libmobi library .El \" Ends the list .\" .Sh DIAGNOSTICS \" May not be needed .\" .Bl -diag @@ -65,15 +50,5 @@ FILE_2 description .\" .It Diagnostic Tag .\" Diagnostic informtion here. .\" .El -.Sh SEE ALSO -.\" List links in ascending order by section, alphabetically within a section. -.\" Please do not reference files that do not exist without filing a bug report -.Xr a 1 , -.Xr b 1 , -.Xr c 1 , -.Xr a 2 , -.Xr b 2 , -.Xr a 3 , -.Xr b 3 -.\" .Sh BUGS \" Document known, unremedied bugs +.\" .Sh BUGS \" Document known, unremedied bugs .\" .Sh HISTORY \" Document history if command behaves in a unique manner \ No newline at end of file diff --git a/tools/mobitool.c b/tools/mobitool.c index 6ad9e0d..67176fe 100644 --- a/tools/mobitool.c +++ b/tools/mobitool.c @@ -1,59 +1,141 @@ -// -// test.c -// mobi -// -// Created by Bartek on 25.03.14. -// Copyright (c) 2014 Bartek. All rights reserved. -// +/** @file mobitool.c + * + * @brief mobitool + * + * @example mobitool.c + * Program for testing libmobi library + * + * Copyright (c) 2014 Bartek Fabiszewski + * http://www.fabiszewski.net + * + * Licensed under LGPL, either version 3, or any later. + * See + */ #include -#include "../src/mobi.h" -//#include +#include +#include +#include +#include +#include +#include +/* include libmobi header */ +#include -// FIXME: testing -#define DUMP_REC_OPT 0; -#define LOADFILENAME 1 +#ifdef HAVE_SYS_RESOURCE_H +/* rusage */ +#include +#define PRINT_RUSAGE_ARG "u" +#else +#define PRINT_RUSAGE_ARG "" +#endif +/* return codes */ +#define ERROR 1 +#define SUCCESS 0 -void print_meta(MOBIData *m) { +#if defined(__clang__) +#define COMPILER "clang " __VERSION__ +#elif defined(__GNUC__) +#if (defined(__MINGW32__) || defined(__MINGW64__)) +#define COMPILER "gcc (MinGW) " __VERSION__ +#else +#define COMPILER "gcc " __VERSION__ +#endif +#else +#define COMPILER +#endif + +/* command line options */ +int dump_rawml_opt = 0; +int print_rec_meta_opt = 0; +int dump_rec_opt = 0; +int parse_kf7_opt = 0; +int dump_parts_opt = 0; +int print_rusage_opt = 0; + +#ifdef _WIN32 +const int separator = '\\'; +#else +const int separator = '/'; +#endif + +/** + @brief Parse file name into file path and base name + @param[in] fullpath Full file path + @param[in,out] dirname Will be set to full dirname + @param[in,out] basename Will be set to file basename + */ +void split_fullpath(const char *fullpath, char *dirname, char *basename) { + char *p = strrchr(fullpath, separator); + if (p) { + p += 1; + strncpy(dirname, fullpath, (p - fullpath)); + dirname[p - fullpath] = '\0'; + strncpy(basename, p, strlen(p) + 1); + } + else { + dirname[0] = '\0'; + strncpy(basename, fullpath, strlen(fullpath) + 1); + } + p = strrchr(basename, '.'); + if (p) { + *p = '\0'; + } +} + +/** + @brief Print all loaded headers meta information + @param[in] m MOBIData structure + */ +void print_meta(const MOBIData *m) { + /* Full name stored at offset given in MOBI header */ if (m->mh && m->mh->full_name_offset && m->mh->full_name_length) { - char *full_name; size_t len = *m->mh->full_name_length; - full_name = malloc(len + 1); - mobi_get_fullname(m, full_name, len); - printf("full name: %s\n", full_name); - free(full_name); - } - printf("name: %s\n", m->ph->name); - printf("attributes: %hu\n", m->ph->attributes); - printf("version: %hu\n", m->ph->version); - printf("ctime: %u\n", m->ph->ctime); - printf("mtime: %u\n", m->ph->mtime); - printf("mtime: %u\n", m->ph->mtime); - printf("btime: %u\n", m->ph->btime); - printf("mod_num: %u\n", m->ph->mod_num); - printf("appinfo_offset: %u\n", m->ph->appinfo_offset); - printf("sortinfo_offset: %u\n", m->ph->sortinfo_offset); - printf("type: %s\n", m->ph->type); - printf("creator: %s\n", m->ph->creator); - printf("uid: %u\n", m->ph->uid); - printf("next_rec: %u\n", m->ph->next_rec); - printf("rec_count: %u\n", m->ph->rec_count); + char full_name[len + 1]; + if(mobi_get_fullname(m, full_name, len) == MOBI_SUCCESS) { + printf("\nFull name: %s\n", full_name); + } + } + /* Palm database header */ + if (m->ph) { + printf("\nPalm doc header:\n"); + printf("name: %s\n", m->ph->name); + printf("attributes: %hu\n", m->ph->attributes); + printf("version: %hu\n", m->ph->version); + struct tm * timeinfo = mobi_pdbtime_to_time(m->ph->ctime); + printf("ctime: %s", asctime(timeinfo)); + timeinfo = mobi_pdbtime_to_time(m->ph->mtime); + printf("mtime: %s", asctime(timeinfo)); + timeinfo = mobi_pdbtime_to_time(m->ph->btime); + printf("btime: %s", asctime(timeinfo)); + printf("mod_num: %u\n", m->ph->mod_num); + printf("appinfo_offset: %u\n", m->ph->appinfo_offset); + printf("sortinfo_offset: %u\n", m->ph->sortinfo_offset); + printf("type: %s\n", m->ph->type); + printf("creator: %s\n", m->ph->creator); + printf("uid: %u\n", m->ph->uid); + printf("next_rec: %u\n", m->ph->next_rec); + printf("rec_count: %u\n", m->ph->rec_count); + } + /* Record 0 header */ if (m->rh) { - printf("\nRecord 0:\n"); + printf("\nRecord 0 header:\n"); printf("compresion type: %u\n", m->rh->compression_type); printf("text length: %u\n", m->rh->text_length); - printf("record count: %u\n", m->rh->text_record_count); - printf("record size: %u\n", m->rh->text_record_size); + printf("text record count: %u\n", m->rh->text_record_count); + printf("text record size: %u\n", m->rh->text_record_size); printf("encryption type: %u\n", m->rh->encryption_type); printf("unknown: %u\n", m->rh->unknown1); } + /* Mobi header */ if (m->mh) { + printf("\nMOBI header:\n"); printf("identifier: %s\n", m->mh->mobi_magic); if(m->mh->header_length) { printf("header length: %u\n", *m->mh->header_length); } if(m->mh->mobi_type) { printf("mobi type: %u\n", *m->mh->mobi_type); } if(m->mh->text_encoding) { printf("text encoding: %u\n", *m->mh->text_encoding); } if(m->mh->uid) { printf("unique id: %u\n", *m->mh->uid); } - if(m->mh->file_version) { printf("file version: %u\n", *m->mh->file_version); } + if(m->mh->version) { printf("file version: %u\n", *m->mh->version); } if(m->mh->orth_index) { printf("orth index: %u\n", *m->mh->orth_index); } if(m->mh->infl_index) { printf("infl index: %u\n", *m->mh->infl_index); } if(m->mh->names_index) { printf("names index: %u\n", *m->mh->names_index); } @@ -67,23 +149,32 @@ void print_meta(MOBIData *m) { if(m->mh->non_text_index) { printf("non text index: %u\n", *m->mh->non_text_index); } if(m->mh->full_name_offset) { printf("full name offset: %u\n", *m->mh->full_name_offset); } if(m->mh->full_name_length) { printf("full name length: %u\n", *m->mh->full_name_length); } - if(m->mh->locale) { printf("locale: %u\n", *m->mh->locale); } - if(m->mh->input_lang) { printf("input lang: %u\n", *m->mh->input_lang); } - if(m->mh->output_lang) { printf("outpu lang: %u\n", *m->mh->output_lang); } + if(m->mh->locale) { + const char *locale_string = mobi_get_locale_string(*m->mh->locale); + if (locale_string) { + printf("locale: %s (%u)\n", locale_string, *m->mh->locale); + } else { + printf("locale: unknown (%u)\n", *m->mh->locale); + } + } + if(m->mh->dict_input_lang) { printf("input lang: %u\n", *m->mh->dict_input_lang); } + if(m->mh->dict_output_lang) { printf("output lang: %u\n", *m->mh->dict_output_lang); } if(m->mh->min_version) { printf("minimal version: %u\n", *m->mh->min_version); } if(m->mh->image_index) { printf("first image index: %u\n", *m->mh->image_index); } if(m->mh->huff_rec_index) { printf("huffman record offset: %u\n", *m->mh->huff_rec_index); } - if(m->mh->huff_rec_count) { printf("huffman record count: %u\n", *m->mh->huff_rec_count); } - if(m->mh->huff_table_offset) { printf("huffman table offset: %u\n", *m->mh->huff_table_offset); } - if(m->mh->huff_table_length) { printf("huffman table length: %u\n", *m->mh->huff_table_length); } + if(m->mh->huff_rec_count) { printf("huffman records count: %u\n", *m->mh->huff_rec_count); } + if(m->mh->datp_rec_index) { printf("DATP record offset: %u\n", *m->mh->datp_rec_index); } + if(m->mh->datp_rec_count) { printf("DATP records count: %u\n", *m->mh->datp_rec_count); } if(m->mh->exth_flags) { printf("EXTH flags: %u\n", *m->mh->exth_flags); } if(m->mh->unknown6) { printf("unknown: %u\n", *m->mh->unknown6); } if(m->mh->drm_offset) { printf("drm offset: %u\n", *m->mh->drm_offset); } + if(m->mh->drm_count) { printf("drm count: %u\n", *m->mh->drm_count); } if(m->mh->drm_size) { printf("drm size: %u\n", *m->mh->drm_size); } if(m->mh->drm_flags) { printf("drm flags: %u\n", *m->mh->drm_flags); } if(m->mh->first_text_index) { printf("first text index: %u\n", *m->mh->first_text_index); } if(m->mh->last_text_index) { printf("last text index: %u\n", *m->mh->last_text_index); } - if(m->mh->unknown9) { printf("unknown: %u\n", *m->mh->unknown9); } + if(m->mh->fdst_index) { printf("FDST offset: %u\n", *m->mh->fdst_index); } + if(m->mh->fdst_section_count) { printf("FDST count: %u\n", *m->mh->fdst_section_count); } if(m->mh->fcis_index) { printf("FCIS index: %u\n", *m->mh->fcis_index); } if(m->mh->fcis_count) { printf("FCIS count: %u\n", *m->mh->fcis_count); } if(m->mh->flis_index) { printf("FLIS index: %u\n", *m->mh->flis_index); } @@ -98,8 +189,11 @@ void print_meta(MOBIData *m) { if(m->mh->ncx_index) { printf("NCX offset: %u\n", *m->mh->ncx_index); } if(m->mh->unknown14) { printf("unknown: %u\n", *m->mh->unknown14); } if(m->mh->unknown15) { printf("unknown: %u\n", *m->mh->unknown15); } + if(m->mh->fragment_index) { printf("fragment index: %u\n", *m->mh->fragment_index); } + if(m->mh->skeleton_index) { printf("skeleton index: %u\n", *m->mh->skeleton_index); } if(m->mh->datp_index) { printf("DATP index: %u\n", *m->mh->datp_index); } if(m->mh->unknown16) { printf("unknown: %u\n", *m->mh->unknown16); } + if(m->mh->guide_index) { printf("guide index: %u\n", *m->mh->guide_index); } if(m->mh->unknown17) { printf("unknown: %u\n", *m->mh->unknown17); } if(m->mh->unknown18) { printf("unknown: %u\n", *m->mh->unknown18); } if(m->mh->unknown19) { printf("unknown: %u\n", *m->mh->unknown19); } @@ -107,32 +201,85 @@ void print_meta(MOBIData *m) { } } -void print_exth(MOBIData *m) { - MOBIExtHeader *curr; +/** + @brief Print all loaded EXTH record tags + @param[in] m MOBIData structure + */ +void print_exth(const MOBIData *m) { if (m->eh == NULL) { return; } - curr = m->eh; + /* Linked list of MOBIExthHeader structures holds EXTH records */ + const MOBIExthHeader *curr = m->eh; + if (curr != NULL) { + printf("\nEXTH records:\n"); + } + uint32_t val32; while (curr != NULL) { - char *str; - uint32_t val; - str = calloc(1, curr->size+1); - strncpy(str, curr->data, curr->size); - val = *(uint32_t*) curr->data; - if (is_littleendian()) { - val = endian_swap32(val); + /* check if it is a known tag and get some more info if it is */ + MOBIExthMeta tag = mobi_get_exthtagmeta_by_tag(curr->tag); + if (tag.tag == 0) { + /* unknown tag */ + /* try to print the record both as string and numeric value */ + char str[curr->size + 1]; + unsigned i = 0; + unsigned char *p = curr->data; + while (isprint(*p) && i < curr->size) { + str[i] = (char)*p++; + i++; + } + str[i] = '\0'; + val32 = mobi_decode_exthvalue(curr->data, curr->size); + printf("Unknown (%i): %s (%u)\n", curr->tag, str, val32); + } else { + /* known tag */ + unsigned i = 0; + size_t size = curr->size; + char str[2 * size + 1]; + unsigned char *data = curr->data; + switch (tag.type) { + /* numeric */ + case EXTH_NUMERIC: + val32 = mobi_decode_exthvalue(data, size); + printf("%s: %u\n", tag.name, val32); + break; + /* string */ + case EXTH_STRING: + { + char *exth_string = mobi_decode_exthstring(m, data, size); + if (exth_string) { + printf("%s: %s\n", tag.name, exth_string); + free(exth_string); + } + break; + } + /* binary */ + case EXTH_BINARY: + while (size--) { + uint8_t val8 = *data++; + sprintf(&str[i], "%02x", val8); + i += 2; + } + printf("%s: 0x%s\n", tag.name, str); + break; + default: + break; + } + } - printf("id: %i\tval: %s (%u)\tsize: %zu\n", curr->uid, str, val, curr->size); - free(str); curr = curr->next; } } -void print_records_meta(MOBIData *m) { - MOBIPdbRecord *currec; - currec = m->rec; +/** + @brief Print meta data of each document record + @param[in] m MOBIData structure + */ +void print_records_meta(const MOBIData *m) { + /* Linked list of MOBIPdbRecord structures holds records data and metadata */ + const MOBIPdbRecord *currec = m->rec; while (currec != NULL) { - printf("offset: %zu\n", currec->offset); + printf("offset: %u\n", currec->offset); printf("size: %zu\n", currec->size); printf("attributes: %hhu\n", currec->attributes); printf("uid: %u\n", currec->uid); @@ -141,81 +288,305 @@ void print_records_meta(MOBIData *m) { } } -void dump_records(MOBIData *m, char *filepath) { - MOBIPdbRecord *currec; - FILE *file; - char name[FILENAME_MAX]; +/** + @brief Dump each document record to a file + @param[in] m MOBIData structure + @param[in] fullpath File path will be parsed to build basenames of dumped records + */ +void dump_records(const MOBIData *m, const char *fullpath) { + char dirname[FILENAME_MAX]; + char basename[FILENAME_MAX]; + split_fullpath(fullpath, dirname, basename); + /* Linked list of MOBIPdbRecord structures holds records data and metadata */ + const MOBIPdbRecord *currec = m->rec; int i = 0; - currec = m->rec; while (currec != NULL) { - sprintf(name, "%spart_%i_uid_%i", filepath, i++, currec->uid); - file = fopen(name, "wb"); + char name[FILENAME_MAX]; + sprintf(name, "%s%s.part_%i_uid_%i", dirname, basename, i++, currec->uid); + FILE *file = fopen(name, "wb"); + if (file == NULL) { + printf("Could not open file for writing: %s\n", name); + return; + } fwrite(currec->data, 1, currec->size, file); fclose(file); currec = currec->next; } } -int dump_rawml(MOBIData *m, char *filepath) { - FILE *file; - int ret; +/** + @brief Dump all text records, decompressed and concatenated, to a single rawml file + @param[in] m MOBIData structure + @param[in] fullpath File path will be parsed to create a new name for saved file + */ +int dump_rawml(const MOBIData *m, const char *fullpath) { + char dirname[FILENAME_MAX]; + char basename[FILENAME_MAX]; + split_fullpath(fullpath, dirname, basename); char name[FILENAME_MAX]; - sprintf(name, "%srawml", filepath); - file = fopen(name, "wb"); - ret = mobi_dump_rawml(m, file); + sprintf(name, "%s%s.rawml", dirname, basename); + FILE *file = fopen(name, "wb"); + if (file == NULL) { + printf("Could not open file for writing: %s\n", name); + return ERROR; + } + const MOBI_RET mobi_ret = mobi_dump_rawml(m, file); fclose(file); - return ret; + if (mobi_ret != MOBI_SUCCESS) { + printf("Dumping rawml file failed (%i)", mobi_ret); + return ERROR; + } + return SUCCESS; } -int loadfilename(const char *filename) { - MOBIData *m; - int ret = 0; - m = mobi_init(); +/** + @brief Dump parsed markup files and resources into created folder + @param[in] rawml MOBIRawml structure holding parsed records + @param[in] fullpath File path will be parsed to build basenames of dumped records + */ +int dump_rawml_parts(const MOBIRawml *rawml, const char *fullpath) { + if (rawml == NULL) { + printf("Rawml structure not initialized\n"); + return ERROR; + } + char dirname[FILENAME_MAX]; + char basename[FILENAME_MAX]; + split_fullpath(fullpath, dirname, basename); + char newdir[FILENAME_MAX]; + sprintf(newdir, "%s%s_markup", dirname, basename); + printf("Saving markup to %s\n", newdir); +#ifdef _WIN32 + _mkdir(newdir); +#else + mkdir(newdir, S_IRWXU); +#endif + char partname[FILENAME_MAX]; + if (rawml->markup != NULL) { + /* Linked list of MOBIPart structures in rawml->markup holds main text files */ + MOBIPart *curr = rawml->markup; + while (curr != NULL) { + MOBIFileMeta file_meta = mobi_get_filemeta_by_type(curr->type); + sprintf(partname, "%s%cpart%05zu.%s", newdir, separator, curr->uid, file_meta.extension); + FILE *file = fopen(partname, "wb"); + if (file == NULL) { + printf("Could not open file for writing: %s\n", partname); + return ERROR; + } + printf("part%05zu.%s\n", curr->uid, file_meta.extension); + fwrite(curr->data, 1, curr->size, file); + fclose(file); + curr = curr->next; + } + } + if (rawml->flow != NULL) { + /* Linked list of MOBIPart structures in rawml->flow holds supplementary text files */ + MOBIPart *curr = rawml->flow; + /* skip raw html file */ + curr = curr->next; + while (curr != NULL) { + MOBIFileMeta file_meta = mobi_get_filemeta_by_type(curr->type); + sprintf(partname, "%s%cflow%05zu.%s", newdir, separator, curr->uid, file_meta.extension); + FILE *file = fopen(partname, "wb"); + if (file == NULL) { + printf("Could not open file for writing: %s\n", partname); + return ERROR; + } + printf("flow%05zu.%s\n", curr->uid, file_meta.extension); + fwrite(curr->data, 1, curr->size, file); + fclose(file); + curr = curr->next; + } + } + if (rawml->resources != NULL) { + /* Linked list of MOBIPart structures in rawml->resources holds binary files */ + MOBIPart *curr = rawml->resources; + /* jpg, gif, png, bmp, font, audio, video */ + while (curr != NULL) { + MOBIFileMeta file_meta = mobi_get_filemeta_by_type(curr->type); + if (curr->size > 0) { + sprintf(partname, "%s%cresource%05zu.%s", newdir, separator, curr->uid, file_meta.extension); + FILE *file = fopen(partname, "wb"); + if (file == NULL) { + printf("Could not open file for writing: %s\n", partname); + return ERROR; + } + printf("resource%05zu.%s\n", curr->uid, file_meta.extension); + fwrite(curr->data, 1, curr->size, file); + fclose(file); + } + curr = curr->next; + } + } + return SUCCESS; +} + + +/** + @brief Main routine that calls optional subroutines + @param[in] fullpath Full file path + */ +int loadfilename(const char *fullpath) { + MOBI_RET mobi_ret; + int ret = SUCCESS; + /* Initialize main MOBIData structure */ + MOBIData *m = mobi_init(); if (m == NULL) { - printf("init failed\n"); - return 1; + printf("Memory allocation failed\n"); + return ERROR; } - char filepath[FILENAME_MAX]; - char *p = strrchr(filename, '/'); - if (p) { - p += 1; - strncpy(filepath, filename, (p - filename)); - filepath[p - filename] = '\0'; + /* By default loader will parse KF8 part of hybrid KF7/KF8 file */ + if (parse_kf7_opt) { + /* Force it to parse KF7 part */ + mobi_parse_kf7(m); } - else { - filepath[0] = '\0'; + FILE *file = fopen(fullpath, "rb"); + if (file == NULL) { + printf("Error opening file: %s\n", fullpath); + mobi_free(m); + return ERROR; } - m->use_kf8 = MOBI_USE_KF7; - ret = mobi_load_filename(m, filename); + /* MOBIData structure will be filled with loaded document data and metadata */ + mobi_ret = mobi_load_file(m, file); + fclose(file); + /* Try to print basic metadata, even if further loading failed */ + /* In case of some unsupported formats it may still print some useful info */ print_meta(m); - if (ret == MOBI_ERROR) { + if (mobi_ret != MOBI_SUCCESS) { + printf("Error while loading document (%i)\n", mobi_ret); mobi_free(m); - return 1; + return ERROR; } + /* Try to print EXTH metadata */ print_exth(m); - print_records_meta(m); - int dump_rec_opt = DUMP_REC_OPT; + if (print_rec_meta_opt) { + printf("\nPrinting records metadata...\n"); + print_records_meta(m); + } if (dump_rec_opt) { - dump_records(m, filepath); + printf("\nDumping raw records...\n"); + dump_records(m, fullpath); + } + if (dump_rawml_opt) { + printf("\nDumping rawml...\n"); + ret = dump_rawml(m, fullpath); + } else if (dump_parts_opt) { + printf("\nReconstructing source resources...\n"); + /* Initialize MOBIRawml structure */ + /* This structure will be filled with parsed records data */ + MOBIRawml *rawml = mobi_init_rawml(m); + if (rawml == NULL) { + printf("Memory allocation failed\n"); + mobi_free(m); + return ERROR; + } + /* Parse rawml text and other data held in MOBIData structure into MOBIRawml structure */ + mobi_ret = mobi_parse_rawml(rawml, m); + if (mobi_ret != MOBI_SUCCESS) { + printf("Parsing rawml failed (%i)\n", mobi_ret); + mobi_free(m); + mobi_free_rawml(rawml); + return ERROR; + } + printf("\ndumping resources...\n"); + /* Save parts to files */ + ret = dump_rawml_parts(rawml, fullpath); + if (ret != SUCCESS) { + printf("Dumping parts failed\n"); + } + /* Free MOBIRawml structure */ + mobi_free_rawml(rawml); } - ret = dump_rawml(m, filepath); + /* Free MOBIData structure */ mobi_free(m); return ret; } +/** + @brief Print usage info + @param[in] progname Executed program name + */ +void usage(const char *progname) { + printf("usage: %s [-dmrs" PRINT_RUSAGE_ARG "v7] filename\n", progname); + printf(" without arguments prints document metadata and exits\n"); + printf(" -d dump rawml text record\n"); + printf(" -m print records metadata\n"); + printf(" -r dump raw records\n"); + printf(" -s dump recreated source files\n"); +#ifdef HAVE_SYS_RESOURCE_H + printf(" -u show rusage\n"); +#endif + printf(" -v show version and exit\n"); + printf(" -7 parse KF7 part of hybrid file (by default KF8 part is parsed)\n"); + exit(0); +} +/** + @brief Main + */ int main(int argc, char *argv[]) { - if (argc != 2) { - printf("usage: %s filename\n", argv[0]); - return 1; + if (argc < 2) { + usage(argv[0]); + } + opterr = 0; + int c; + while((c = getopt(argc, argv, "dmrs" PRINT_RUSAGE_ARG "v7")) != -1) + switch(c) { + case 'd': + dump_rawml_opt = 1; + break; + case 'm': + print_rec_meta_opt = 1; + break; + case 'r': + dump_rec_opt = 1; + break; + case 's': + dump_parts_opt = 1; + break; +#ifdef HAVE_SYS_RESOURCE_H + case 'u': + print_rusage_opt = 1; + break; +#endif + case 'v': + printf("mobitool build: " __DATE__ " " __TIME__ " (" COMPILER ")\n"); + printf("libmobi: %s\n", mobi_version()); + return 0; + break; + case '7': + parse_kf7_opt = 1; + break; + case '?': + if (isprint(optopt)) { + fprintf(stderr, "Unknown option `-%c'\n", optopt); + } + else { + fprintf(stderr, "Unknown option character `\\x%x'\n", optopt); + } + usage(argv[0]); + default: + usage(argv[0]); + } + if (argc <= optind) { + printf("Missing filename\n"); + usage(argv[0]); } - int command = LOADFILENAME; int ret = 0; char filename[FILENAME_MAX]; - strncpy(filename, argv[1], FILENAME_MAX - 1); - switch (command) { - case LOADFILENAME: - ret = loadfilename(filename); - break; + strncpy(filename, argv[optind], FILENAME_MAX - 1); + ret = loadfilename(filename); +#ifdef HAVE_SYS_RESOURCE_H + if (print_rusage_opt) { + /* rusage */ + struct rusage ru; + struct timeval utime; + struct timeval stime; + getrusage(RUSAGE_SELF, &ru); + utime = ru.ru_utime; + stime = ru.ru_stime; + printf("RUSAGE: ru_utime => %lld.%lld sec.; ru_stime => %lld.%lld sec.\n", + (long long) utime.tv_sec, (long long) utime.tv_usec, + (long long) stime.tv_sec, (long long) stime.tv_usec); } +#endif return ret; }