From fea948631b1a3512c21032e834a044ac8fbb5c09 Mon Sep 17 00:00:00 2001 From: Bartek Fabiszewski Date: Fri, 11 Apr 2014 18:47:37 +0200 Subject: [PATCH] initial commit --- AUTHORS | 0 ChangeLog | 0 Makefile.am | 8 + NEWS | 0 README | 0 autogen.sh | 2 + configure.ac | 44 ++++ src/Makefile.am | 11 + src/buffer.c | 191 ++++++++++++++++ src/buffer.h | 42 ++++ src/compression.c | 139 ++++++++++++ src/compression.h | 29 +++ src/debug.c | 38 ++++ src/debug.h | 27 +++ src/memory.c | 156 +++++++++++++ src/memory.h | 23 ++ src/mobi.h | 253 +++++++++++++++++++++ src/read.c | 557 ++++++++++++++++++++++++++++++++++++++++++++++ src/read.h | 21 ++ src/util.c | 331 +++++++++++++++++++++++++++ src/util.h | 17 ++ src/write.c | 149 +++++++++++++ src/write.h | 20 ++ tools/Makefile.am | 12 + tools/mobitool.1 | 79 +++++++ tools/mobitool.c | 221 ++++++++++++++++++ 26 files changed, 2370 insertions(+) create mode 100644 AUTHORS create mode 100644 ChangeLog create mode 100644 Makefile.am create mode 100644 NEWS create mode 100644 README create mode 100755 autogen.sh create mode 100644 configure.ac create mode 100644 src/Makefile.am create mode 100644 src/buffer.c create mode 100644 src/buffer.h create mode 100644 src/compression.c create mode 100644 src/compression.h create mode 100644 src/debug.c create mode 100644 src/debug.h create mode 100644 src/memory.c create mode 100644 src/memory.h create mode 100644 src/mobi.h create mode 100644 src/read.c create mode 100644 src/read.h create mode 100644 src/util.c create mode 100644 src/util.h create mode 100644 src/write.c create mode 100644 src/write.h create mode 100644 tools/Makefile.am create mode 100644 tools/mobitool.1 create mode 100644 tools/mobitool.c diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..e69de29 diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..e69de29 diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..c6e73f5 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,8 @@ +# project Makefile.am + +SUBDIRS = src tools +ACLOCAL_AMFLAGS = -I m4 +# what flags you want to pass to the C compiler & linker +AM_CFLAGS = --pedantic -Wall -std=gnu99 -O2 +AM_LDFLAGS = + diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..e69de29 diff --git a/README b/README new file mode 100644 index 0000000..e69de29 diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..b483139 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,2 @@ +#!/bin/sh +autoreconf --force --install -I m4 diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..61d395d --- /dev/null +++ b/configure.ac @@ -0,0 +1,44 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([1.10]) +AC_INIT([libmobi], [0.1]) +AC_CONFIG_SRCDIR([src/buffer.c]) + +# Enable automake +AM_INIT_AUTOMAKE([-Wall -Werror foreign]) +# all defined C macros (HAVE_*) will be saved to this file +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_MACRO_DIR([m4]) + +# Checks for programs. +AC_PROG_CC +AC_PROG_INSTALL +m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) + +# Init libtool +LT_INIT + +# Checks for libraries. + +# Checks for header files. +AC_CHECK_HEADERS([stdlib.h string.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_TYPE_INT32_T +AC_TYPE_INT8_T +AC_TYPE_SIZE_T +AC_TYPE_UINT16_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T +AC_TYPE_UINT8_T + +# Checks for library functions. +AC_FUNC_MALLOC +AC_FUNC_REALLOC +AC_CHECK_FUNCS([memset strrchr]) + +AC_CONFIG_FILES([Makefile]) +AC_CONFIG_FILES([src/Makefile]) +AC_CONFIG_FILES([tools/Makefile]) +AC_OUTPUT diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..aebc615 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,11 @@ +# libmobi + +# what flags you want to pass to the C compiler & linker +AM_CFLAGS = --pedantic -Wall -std=gnu99 -O2 +AM_LDFLAGS = + +# this lists the binaries to produce, the (non-PHONY, binary) targets in +# the previous manual Makefile +lib_LTLIBRARIES = libmobi.la +libmobi_la_SOURCES = buffer.c compression.c debug.c memory.c read.c util.c write.c \ + buffer.h compression.h debug.h memory.h mobi.h read.h util.h write.h diff --git a/src/buffer.c b/src/buffer.c new file mode 100644 index 0000000..6d86244 --- /dev/null +++ b/src/buffer.c @@ -0,0 +1,191 @@ +// +// buffer.c +// mobi +// +// Created by Bartek on 27.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include "buffer.h" + +#define MAX_BUFFER_SIZE 4096 + +MOBIBuffer * buffer_init(size_t len) { + MOBIBuffer *p = NULL; + p = malloc(sizeof(MOBIBuffer)); + if (p == NULL) { + printf("Buffer allocation failed\n"); + return NULL; + } + p->data = malloc(len); + if (p->data == NULL) { + free(p); + printf("Buffer data allocation failed\n"); + return NULL; + } + p->offset = 0; + p->maxlen = len; + return p; +} + + +void buffer_add8(MOBIBuffer *p, uint8_t data) { + if (p->offset + 1 > p->maxlen) { + printf("Buffer full\n"); + return; + } + p->data[p->offset++] = data; +} + +void buffer_add16(MOBIBuffer *p, uint16_t data) { + if (p->offset + 2 > p->maxlen) { + printf("Buffer full\n"); + return; + } + p->data[p->offset++] = (data & 0xff00) >> 8; + p->data[p->offset++] = (data & 0xff); +} + +void buffer_add32(MOBIBuffer *p, uint32_t data) { + if (p->offset + 4 > p->maxlen) { + printf("Buffer full\n"); + return; + } + p->data[p->offset++] = (data & 0xff000000) >> 16; + p->data[p->offset++] = (data & 0xff0000) >> 12; + p->data[p->offset++] = (data & 0xff00) >> 8; + p->data[p->offset++] = (data & 0xff); +} + +void buffer_addraw(MOBIBuffer *p, char* buf, size_t len) { + if (p->offset + len > p->maxlen) { + printf("Buffer full\n"); + return; + } + memcpy(p->data + p->offset, buf, len); + p->offset += len; +} + +void buffer_addstring(MOBIBuffer *p, char *str) { + size_t len; + len = strlen(str); + buffer_addraw(p, str, len); +} + +void buffer_addzeros(MOBIBuffer *p, size_t count) { + if (p->offset + count > p->maxlen) { + printf("Buffer full\n"); + return; + } + memset(p->data + p->offset, 0, count); + p->offset += count; +} + +uint8_t buffer_get8(MOBIBuffer *p) { + if (p->offset + 1 > p->maxlen) { + printf("End of buffer\n"); + return 0; + } + return (uint8_t) p->data[p->offset++]; +} + +uint16_t buffer_get16(MOBIBuffer *p) { + if (p->offset + 2 > p->maxlen) { + printf("End of buffer\n"); + return 0; + } + uint16_t val; + val = (uint8_t) p->data[p->offset] << 8 | (uint8_t) p->data[p->offset + 1]; + p->offset += 2; + return val; +} + +uint32_t buffer_get32(MOBIBuffer *p) { + if (p->offset + 4 > p->maxlen) { + printf("End of buffer\n"); + return 0; + } + uint32_t val; + val = (uint8_t) p->data[p->offset] << 24 | (uint8_t) p->data[p->offset + 1] << 16 | (uint8_t) p->data[p->offset + 2] << 8 | (uint8_t) p->data[p->offset + 3]; + p->offset += 4; + return val; +} + +void buffer_getstring(char *str, MOBIBuffer *p, size_t len) { + if (p->offset + len > p->maxlen) { + printf("End of buffer\n"); + return; + } + strncpy(str, p->data + p->offset, len); + p->offset += len; +} + +void buffer_getraw(void *ptr, MOBIBuffer *p, size_t len) { + if (p->offset + len > p->maxlen) { + printf("End of buffer\n"); + return; + } + memcpy(ptr, p->data + p->offset, len); + p->offset += len; +} + +void buffer_copy8(uint8_t **val, MOBIBuffer *p) { + *val = NULL; + if (p->offset + 1 > p->maxlen) { + return; + } + *val = malloc(sizeof(uint8_t)); + if (*val == NULL) { + return; + } + **val = (uint8_t) p->data[p->offset++]; +} + +void buffer_copy16(uint16_t **val, MOBIBuffer *p) { + *val = NULL; + if (p->offset + 2 > p->maxlen) { + return; + } + *val = malloc(sizeof(uint16_t)); + if (*val == NULL) { + return; + } + **val = (uint8_t) p->data[p->offset] << 8 | (uint8_t) p->data[p->offset + 1]; + p->offset += 2; +} + +void buffer_copy32(uint32_t **val, MOBIBuffer *p) { + *val = NULL; + if (p->offset + 4 > p->maxlen) { + return; + } + *val = malloc(sizeof(uint32_t)); + if (*val == NULL) { + return; + } + **val = (uint8_t) p->data[p->offset] << 24 | (uint8_t) p->data[p->offset + 1] << 16 | (uint8_t) p->data[p->offset + 2] << 8 | (uint8_t) p->data[p->offset + 3]; + p->offset += 4; +} + +int is_littleendian() { + volatile uint32_t i = 1; + return (*((uint8_t*)(&i))) == 1; +} + +uint32_t endian_swap32(uint32_t x) { + return + (x & 0xff) << 24 | + (x & 0xff00) << 8 | + (x & 0xff0000) >> 8 | + (x & 0xff000000) >> 24; +} + +void buffer_free(MOBIBuffer *p) { + if (p == NULL) return; + + if (p->data != NULL) { + free(p->data); + } + free(p); +} diff --git a/src/buffer.h b/src/buffer.h new file mode 100644 index 0000000..88b78cf --- /dev/null +++ b/src/buffer.h @@ -0,0 +1,42 @@ +// +// buffer.h +// mobi +// +// Created by Bartek on 27.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_buffer_h +#define mobi_buffer_h + +#include +#include + +#include "mobi.h" + +typedef struct { + char *data; + size_t offset; + size_t maxlen; +} MOBIBuffer; + +MOBIBuffer * buffer_init(size_t len); +void buffer_add8(MOBIBuffer *p, uint8_t data); +void buffer_add16(MOBIBuffer *p, uint16_t data); +void buffer_add32(MOBIBuffer *p, uint32_t data); +void buffer_addraw(MOBIBuffer *p, char* buf, size_t len); +void buffer_addstring(MOBIBuffer *p, char *str); +void buffer_addzeros(MOBIBuffer *p, size_t count); +uint8_t buffer_get8(MOBIBuffer *p); +uint16_t buffer_get16(MOBIBuffer *p); +uint32_t buffer_get32(MOBIBuffer *p); +void buffer_copy8(uint8_t **val, MOBIBuffer *p); +void buffer_copy16(uint16_t **val, MOBIBuffer *p); +void buffer_copy32(uint32_t **val, MOBIBuffer *p); +void buffer_getstring(char *str, MOBIBuffer *p, size_t len); +void buffer_getraw(void *ptr, MOBIBuffer *p, size_t len); +void buffer_free(MOBIBuffer *p); +int is_littleendian(); +uint32_t endian_swap32(uint32_t x); + +#endif diff --git a/src/compression.c b/src/compression.c new file mode 100644 index 0000000..a50f6e2 --- /dev/null +++ b/src/compression.c @@ -0,0 +1,139 @@ +// +// compression.c +// mobi +// +// Created by Bartek on 27.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include + +#include "compression.h" +#include "mobi.h" + + +// PalmDOC version of LZ77 compression +// Decompressor based on this algorithm: +// http://en.wikibooks.org/wiki/Data_Compression/Dictionary_compression#PalmDoc +// +size_t mobi_decompress_lz77(char *out, const char *in, size_t len) { + size_t start_in = (size_t) in; + size_t start_out = (size_t) out; + while ((size_t) in - start_in < len) { + uint8_t val = (uint8_t) in[0]; + // byte pair: space + char + if (val >= 0xc0) { + *(out++) = ' '; + *(out++) = val ^ 0x80; + in++; + } + // length, distance pair + // 0x8000 + (distance << 3) + ((length-3) & 0x07) + else if (val >= 0x80) { + uint16_t distance = ((((in[0] << 8) | ((uint8_t)in[1])) >> 3) & 0x7ff); + uint8_t length = (in[1] & 0x7) + 3; + while (length-- > 0) { + *(out) = *(out - distance); + out++; + } + in += 2; + } + // single char, not modified + else if (val >= 0x09) { + *(out++) = *(in++); + } + // n chars not modified + else if (val >= 0x01) { + memcpy(out, ++in, val); + out += val; + in += val; + } + // char '\0', not modified + else { + *(out++) = *(in++); + } + } + return (size_t) out - start_out; +} + +uint64_t _fill_buffer(const char *in, size_t len) { + uint32_t in1 = 0L; + uint32_t in2 = 0L; + len = (len < 8) ? len : 8; + size_t i = 0; + while (i < len && i < 4) { + in1 |= (uint8_t) in[i] << ((3-i) * 8); + i++; + } + while (i < len) { + in2 |= (uint8_t) in[i] << ((3-i) * 8); + i++; + } + return (uint64_t) in1 << 32 | in2; +} + +int shortcnt = 0; + +// Mobi version of Huffman coding +// Decompressor and HUFF/CDIC records parsing based on: +// perl EBook::Tools::Mobipocket +// python mobiunpack.py, calibre +size_t mobi_decompress_huffman(char *out, const char *in, size_t len, MOBIHuffCdic *huffcdic, size_t depth) { + size_t start_out = (size_t) out; + int8_t bitcount = 32; + int32_t bitsleft = (int32_t) len * 8; + uint32_t t1, offset; + uint32_t code, maxcode, symbol_length; + uint8_t code_length = 0, i; + uint32_t index; + uint64_t buffer; + buffer = _fill_buffer(in, len); + while (1) { + if (bitcount <= 0) { + bitcount += 32; + in += 4; + buffer = _fill_buffer(in, (bitsleft + (8 - 1)) / 8); + } + code = (buffer >> bitcount) & 0xffffffff; + // lookup code in table1 + t1 = huffcdic->table1[code >> 24]; + // get maxcode and codelen from t1 + code_length = t1 & 0x1f; + maxcode = (((t1 >> 8) + 1) << (32 - code_length)) - 1; + // check termination bit + if (!(t1 & 0x80)) { + // get offset from mincode, maxcode tables + while (code < huffcdic->mincode_table[code_length]) { + code_length++; + } + maxcode = huffcdic->maxcode_table[code_length]; + } + bitcount -= code_length; + bitsleft -= code_length; + if (bitsleft < 0) { + break; + } + // get index for symbol offset + index = (maxcode - code) >> (32 - code_length); + // check which part of cdic to use + i = index >> huffcdic->code_length; + // get offset + offset = huffcdic->symbol_offsets[index]; + symbol_length = (uint8_t) huffcdic->symbols[i][offset] << 8 | (uint8_t) huffcdic->symbols[i][offset + 1]; + // 1st bit is is_decompressed flag + int is_decompressed = symbol_length >> 15; + // get rid of flag + symbol_length &= 0x7fff; + if (is_decompressed) { + memcpy(out, (huffcdic->symbols[i] + offset + 2), symbol_length); + out += symbol_length; + } else { + // symbol is compressed + // TODO cache uncompressed symbols? + out += mobi_decompress_huffman(out, (huffcdic->symbols[i] + offset + 2), (symbol_length), huffcdic, depth + 1); + } + } + return (size_t) out - start_out; + +} diff --git a/src/compression.h b/src/compression.h new file mode 100644 index 0000000..3d1678c --- /dev/null +++ b/src/compression.h @@ -0,0 +1,29 @@ +// +// compression.h +// mobi +// +// Created by Bartek on 27.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_lz77_h +#define mobi_lz77_h + +#include +#include + +typedef struct { + size_t index_count; + size_t index_read; + size_t code_length; + uint32_t table1[256]; + uint32_t mincode_table[33]; + uint32_t maxcode_table[33]; + uint16_t *symbol_offsets; + char **symbols; +} MOBIHuffCdic; + +size_t mobi_decompress_lz77(char *out, const char *in, size_t len); +size_t mobi_decompress_huffman(char *out, const char *in, size_t len, MOBIHuffCdic *huffcdic, size_t depth); + +#endif diff --git a/src/debug.c b/src/debug.c new file mode 100644 index 0000000..1767df1 --- /dev/null +++ b/src/debug.c @@ -0,0 +1,38 @@ +// +// debug.c +// mobi +// +// Created by Bartek on 02.04.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include "debug.h" + +#include + +#if MOBI_DEBUG +// debug +void debug_free(void *ptr, char *file, int line){ + printf("%s:%d: free(%p)\n",file, line, ptr); + (free)(ptr); +} + +void *debug_malloc(size_t size, char *file, int line) { + void *ptr = (malloc)(size); + printf("%s:%d: malloc(%d)=%p\n", file, line, (int)size, ptr); + return ptr; +} + +void *debug_realloc(void *ptr, size_t size, char *file, int line) { + printf("%s:%d: realloc(%p", file, line, ptr); + void *rptr = (realloc)(ptr, size); + printf(", %d)=%p\n", (int)size, rptr); + return rptr; +} + +void *debug_calloc(size_t num, size_t size, char *file, int line) { + void *ptr = (calloc)(num, size); + printf("%s:%d: calloc(%d, %d)=%p\n", file, line, (int)num, (int)size, ptr); + return ptr; +} +#endif diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 0000000..c10c5e6 --- /dev/null +++ b/src/debug.h @@ -0,0 +1,27 @@ +// +// debug.h +// mobi +// +// Created by Bartek on 02.04.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_debug_h +#define mobi_debug_h + +#include + +#define MOBI_DEBUG 0 +#if MOBI_DEBUG +#define free(x) debug_free(x,__FILE__,__LINE__) +void debug_free(void *ptr, char *file, int line); +#define malloc(x) debug_malloc(x, __FILE__, __LINE__ ) +void *debug_malloc(size_t size, char *file, int line); +#define realloc(x, y) debug_realloc(x, y, __FILE__, __LINE__ ) +void *debug_realloc(void *ptr, size_t size, char *file, int line); +#define calloc(x, y) debug_calloc(x, y, __FILE__, __LINE__ ) +void *debug_calloc(size_t num, size_t size, char *file, int line); +#endif + + +#endif diff --git a/src/memory.c b/src/memory.c new file mode 100644 index 0000000..c872d4a --- /dev/null +++ b/src/memory.c @@ -0,0 +1,156 @@ +// +// memory.c +// mobi +// +// Created by Bartek on 31.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include "memory.h" + +MOBIData * mobi_init() { + MOBIData *m = NULL; + m = calloc(1, sizeof(MOBIData)); + if (m == NULL) return NULL; + m->use_kf8 = MOBI_USE_KF8; + m->ph = NULL; + m->rh = NULL; + m->mh = NULL; + m->eh = NULL; + m->rec = NULL; + m->next = NULL; + return m; +} + +void mobi_free_mh(MOBIData *m) { + if (m->mh == NULL) { + return; + } + free(m->mh->header_length); + free(m->mh->mobi_type); + free(m->mh->text_encoding); + free(m->mh->uid); + free(m->mh->file_version); + free(m->mh->orth_index); + free(m->mh->infl_index); + free(m->mh->names_index); + free(m->mh->keys_index); + free(m->mh->extra0_index); + free(m->mh->extra1_index); + free(m->mh->extra2_index); + free(m->mh->extra3_index); + free(m->mh->extra4_index); + free(m->mh->extra5_index); + free(m->mh->non_text_index); + free(m->mh->full_name_offset); + free(m->mh->full_name_length); + free(m->mh->locale); + free(m->mh->input_lang); + free(m->mh->output_lang); + free(m->mh->min_version); + free(m->mh->image_index); + free(m->mh->huff_rec_index); + free(m->mh->huff_rec_count); + free(m->mh->huff_table_offset); + free(m->mh->huff_table_length); + free(m->mh->exth_flags); + free(m->mh->unknown6); + free(m->mh->drm_offset); + free(m->mh->drm_count); + free(m->mh->drm_size); + free(m->mh->drm_flags); + free(m->mh->first_text_index); + free(m->mh->last_text_index); + free(m->mh->unknown9); + free(m->mh->fcis_index); + free(m->mh->fcis_count); + free(m->mh->flis_index); + free(m->mh->flis_count); + free(m->mh->unknown10); + free(m->mh->unknown11); + free(m->mh->srcs_index); + free(m->mh->srcs_count); + free(m->mh->unknown12); + free(m->mh->unknown13); + free(m->mh->extra_flags); + free(m->mh->ncx_index); + free(m->mh->unknown14); + free(m->mh->unknown15); + free(m->mh->datp_index); + free(m->mh->unknown16); + free(m->mh->unknown17); + free(m->mh->unknown18); + free(m->mh->unknown19); + free(m->mh->unknown20); + free(m->mh); + m->mh = NULL; +} + +void mobi_free_rec(MOBIData *m) { + MOBIPdbRecord *curr, *tmp; + curr = m->rec; + while (curr != NULL) { + tmp = curr; + curr = curr->next; + free(tmp->data); + free(tmp); + tmp = NULL; + } + m->rec = NULL; +} + +void mobi_free_eh(MOBIData *m) { + MOBIExtHeader *curr, *tmp; + curr = m->eh; + while (curr != NULL) { + tmp = curr; + curr = curr->next; + free(tmp->data); + free(tmp); + tmp = NULL; + } + m->eh = NULL; +} + +void mobi_free(MOBIData *m) { + if (m == NULL) { + return; + } + mobi_free_mh(m); + mobi_free_eh(m); + mobi_free_rec(m); + free(m->ph); + free(m->rh); + if (m->next) { + mobi_free_mh(m->next); + mobi_free_eh(m->next); + free(m->next->rh); + free(m->next); + m->next = NULL; + } + free(m); + m = NULL; +} + +MOBIHuffCdic * mobi_init_huffcdic(MOBIData *m) { + MOBIHuffCdic *huffcdic; + int ret; + huffcdic = calloc(1, sizeof(MOBIHuffCdic)); + if (huffcdic == NULL) { + printf("Memory allocation for huffcdic structure failed\n"); + return NULL; + } + ret = mobi_parse_huffdic(m, huffcdic); + if (ret == MOBI_ERROR) { + free(huffcdic); + return NULL; + } + return huffcdic; +} + +void mobi_free_huffcdic(MOBIHuffCdic *huffcdic) { + free(huffcdic->symbol_offsets); + free(huffcdic->symbols); + free(huffcdic); +} diff --git a/src/memory.h b/src/memory.h new file mode 100644 index 0000000..6477bab --- /dev/null +++ b/src/memory.h @@ -0,0 +1,23 @@ +// +// memory.h +// mobi +// +// Created by Bartek on 31.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_memory_h +#define mobi_memory_h + +#include "mobi.h" + +MOBIData * mobi_init(); +void mobi_free_mh(MOBIData *m); +void mobi_free_rec(MOBIData *m); +void mobi_free_eh(MOBIData *m); +void mobi_free(MOBIData *m); + +MOBIHuffCdic * mobi_init_huffcdic(MOBIData *m); +void mobi_free_huffcdic(MOBIHuffCdic *huffcdic); + +#endif diff --git a/src/mobi.h b/src/mobi.h new file mode 100644 index 0000000..3cd7bdc --- /dev/null +++ b/src/mobi.h @@ -0,0 +1,253 @@ +// +// mobi.h +// libmobi +// +// Created by Bartek on 24.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef libmobi_mobi_h +#define libmobi_mobi_h + +#include +#include "buffer.h" +#include "compression.h" +#include "debug.h" + +#define MOBI_ERROR -1 +#define MOBI_SUCCESS 0 + +#define MOBI_USE_KF8 1 +#define MOBI_USE_KF7 0 + +#define EPOCH_MAC_DIFF 2082844800 +#define PALMDB_HEADER_LEN 78 +#define PALMDB_NAME_SIZE_MAX 32 +#define PALMDB_ATTRIBUTE_DEFAULT 0 +#define PALMDB_VERSION_DEFAULT 0 +#define PALMDB_MODNUM_DEFAULT 0 +#define PALMDB_APPINFO_DEFAULT 0 +#define PALMDB_SORTINFO_DEFAULT 0 +#define PALMDB_TYPE_DEFAULT "BOOK" +#define PALMDB_CREATOR_DEFAULT "MOBI" +#define PALMDB_NEXTREC_DEFAULT 0 + +#define RECORD0_HEADER_LEN 16 +#define RECORD0_NO_COMPRESSION 1 +#define RECORD0_PALMDOC_COMPRESSION 2 +#define RECORD0_HUFF_COMPRESSION 17480 +#define RECORD0_RECORD_SIZE_MAX 4096 +#define RECORD0_NO_ENCRYPTION 0 +#define RECORD0_OLD_ENCRYPTION 1 +#define RECORD0_MOBI_ENCRYPTION 2 + +#define PDB_RECORD_INFO_SIZE 8 + +#define MOBI_MAGIC "MOBI" +#define EXTH_MAGIC "EXTH" +#define HUFF_MAGIC "HUFF" +#define CDIC_MAGIC "CDIC" + +#define CDIC_HEADER_LEN 16 +#define HUFF_HEADER_LEN 24 +#define HUFF_RECORD_MINSIZE 2584 + +// EXTH +#define DRM_SERVER_ID 1 +#define DRM_COMMERCE_ID 2 +#define DRM_EBOOKBASE_BOOK_ID 3 + +#define MOBI_EXTH_AUTHOR 100 // +#define MOBI_EXTH_PUBLISHER 101 // +#define MOBI_EXTH_IMPRINT 102 // +#define MOBI_EXTH_DESCRIPTION 103 // +#define MOBI_EXTH_ISBN 104 // +#define MOBI_EXTH_SUBJECT 105 // +#define MOBI_EXTH_PUBLISHINGDATE 106 // +#define MOBI_EXTH_REVIEW 107 // +#define MOBI_EXTH_CONTRIBUTOR 108 // +#define MOBI_EXTH_RIGHTS 109 // +#define MOBI_EXTH_SUBJECTCODE 110 // +#define MOBI_EXTH_TYPE 111 // +#define MOBI_EXTH_SOURCE 112 // +#define MOBI_EXTH_ASIN 113 +#define MOBI_EXTH_VERSION 114 +#define MOBI_EXTH_SAMPLE 115 +#define MOBI_EXTH_STARTREADING 116 +#define MOBI_EXTH_ADULT 117 // +#define MOBI_EXTH_PRICE 118 // +#define MOBI_EXTH_PRICECURRENCY 119 // +#define MOBI_EXTH_KF8BOUNDARY 121 +#define MOBI_EXTH_COUNTRESOURCES 125 +#define MOBI_EXTH_KF8OVERURI 129 + +#define MOBI_EXTH_DICTNAME 200 // +#define MOBI_EXTH_COVEROFFSET 201 // +#define MOBI_EXTH_THUMBOFFSET 202 +#define MOBI_EXTH_HASFAKECOVER 203 +#define MOBI_EXTH_CREATORSOFT 204 +#define MOBI_EXTH_CREATORMAJOR 205 +#define MOBI_EXTH_CREATORMINOR 206 +#define MOBI_EXTH_CREATORBUILD 207 +#define MOBI_EXTH_WATERMARK 208 +#define MOBI_EXTH_TAMPERKEYS 209 + +#define MOBI_EXTH_FONTSIGNATURE 300 + +#define MOBI_EXTH_CLIPPINGLIMIT 401 +#define MOBI_EXTH_PUBLISHERLIMIT 402 +#define MOBI_EXTH_TTS 404 +#define MOBI_EXTH_RENAL 405 +#define MOBI_EXTH_RENALEXPIRE 406 + +#define MOBI_EXTH_CDETYPE 501 +#define MOBI_EXTH_LASTUPDATE 502 +#define MOBI_EXTH_UPDATEDTITLE 503 +#define MOBI_EXTH_LANGUAGE 524 // +#define MOBI_EXTH_ALIGNMENT 525 +#define MOBI_EXTH_CREATORBUILD2 535 + + + + + +typedef struct { + char name[PALMDB_NAME_SIZE_MAX + 1]; // zero terminated, trimmed title+author + uint16_t attributes; // PALMDB_ATTRIBUTE_DEFAULT + uint16_t version; // PALMDB_VERSION_DEFAULT + uint32_t ctime; // creation time + uint32_t mtime; // modification time + uint32_t btime; // backup time + uint32_t mod_num; // PALMDB_MODNUM_DEFAULT + uint32_t appinfo_offset; // PALMDB_APPINFO_DEFAULT + uint32_t sortinfo_offset; // PALMDB_SORTINFO_DEFAULT + char type[5]; // PALMDB_TYPE_DEFAULT + char creator[5]; // PALMDB_CREATOR_DEFAULT + uint32_t uid; // used internally to identify record + uint32_t next_rec; // PALMDB_NEXTREC_DEFAULT + uint16_t rec_count; // number of records in the file +} MOBIPdbHeader; + + + +typedef struct pdb_record { + size_t offset; + size_t size; + uint8_t attributes; + uint32_t uid; + char *data; + struct pdb_record *next; +} MOBIPdbRecord; + +typedef struct exth { + int uid; + size_t size; + void *data; + struct exth *next; +} MOBIExtHeader; + +typedef struct { + // PalmDOC header (extended), offset 0, length 16 + uint16_t compression_type; // 0; 1 == no compression, 2 = PalmDOC compression, 17480 = HUFF/CDIC compression + //uint16_t unused; // 2; 0 + uint32_t text_length; // 4; uncompressed length of the entire text of the book + uint16_t text_record_count; // 8; number of PDB records used for the text of the book + uint16_t text_record_size; // 10; maximum size of each record containing text, always 4096 + uint16_t encryption_type; // 12; 0 == no encryption, 1 = Old Mobipocket Encryption, 2 = Mobipocket Encryption + uint16_t unknown1; // 14; usually 0 +} MOBIRecord0Header; + +typedef struct { + // MOBI header, offset 16 + char mobi_magic[5]; // 16: M O B I { 77, 79, 66, 73 } + uint32_t *header_length; // 20: the length of the MOBI header, including the previous 4 bytes + uint32_t *mobi_type; // 24: mobipocket file type + uint32_t *text_encoding; // 28: 1252 = CP1252, 65001 = UTF-8 + uint32_t *uid; // 32: unique id + uint32_t *file_version; // 36: mobipocket format + uint32_t *orth_index; // 40: section number of orthographic meta index. 0xFFFFFFFF if index is not available. + uint32_t *infl_index; // 44: section number of inflection meta index. 0xFFFFFFFF if index is not available. + uint32_t *names_index; // 48: section number of names meta index. 0xFFFFFFFF if index is not available. + uint32_t *keys_index; // 52: section number of keys meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra0_index; // 56: section number of extra 0 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra1_index; // 60: section number of extra 1 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra2_index; // 64: section number of extra 2 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra3_index; // 68: section number of extra 3 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra4_index; // 72: section number of extra 4 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra5_index; // 76: section number of extra 5 meta index. 0xFFFFFFFF if index is not available. + uint32_t *non_text_index; // 80: first record number (starting with 0) that's not the book's text + uint32_t *full_name_offset; // 84: offset in record 0 (not from start of file) of the full name of the book + uint32_t *full_name_length; // 88: + uint32_t *locale; // 92: low byte is main language 09= English, next byte is dialect, 08 = British, 04 = US + uint32_t *input_lang; // 96: input language for a dictionary + uint32_t *output_lang; // 100: output language for a dictionary + uint32_t *min_version; // 104: minimum mobipocket version support needed to read this file. + uint32_t *image_index; // 108: first record number (starting with 0) that contains an image (sequential) + uint32_t *huff_rec_index; // 112: first huffman compression record. + uint32_t *huff_rec_count; // 116: + uint32_t *huff_table_offset; // 120: + uint32_t *huff_table_length; // 124: + uint32_t *exth_flags; // 128: bitfield. if bit 6 (0x40) is set, then there's an EXTH record + // 32 unknown bytes 0? + // unknown2 + // unknown3 + // unknown4 + // unknown5 + uint32_t *unknown6; // 164: use 0xFFFFFFFF + uint32_t *drm_offset; // 168: offset to DRM key info in DRMed files. 0xFFFFFFFF if no DRM + uint32_t *drm_count; // 172: number of entries in DRM info + uint32_t *drm_size; // 176: number of bytes in DRM info + uint32_t *drm_flags; // 180: some flags concerning the DRM info + // 8 unknown bytes 0? + // unknown7 + // unknown8 + uint16_t *first_text_index; // 192: + uint16_t *last_text_index; // 194: + uint32_t *unknown9; // 196: + uint32_t *fcis_index; // 200: + uint32_t *fcis_count; // 204: + uint32_t *flis_index; // 208: + uint32_t *flis_count; // 212: + uint32_t *unknown10; // 216: + uint32_t *unknown11; // 220: + uint32_t *srcs_index; // 224: + uint32_t *srcs_count; // 228: + uint32_t *unknown12; // 232: + uint32_t *unknown13; // 236: + // uint16_t fill 0 + uint16_t *extra_flags; // 242: + uint32_t *ncx_index; // 244: + uint32_t *unknown14; // 248: + uint32_t *unknown15; // 252: + uint32_t *datp_index; // 256: + uint32_t *unknown16; // 260: + uint32_t *unknown17; // 264: + uint32_t *unknown18; // 268: + uint32_t *unknown19; // 272: + uint32_t *unknown20; // 276: +} MOBIMobiHeader; + +typedef struct m { + uint8_t use_kf8; + MOBIPdbHeader *ph; + MOBIRecord0Header *rh; + MOBIMobiHeader *mh; + MOBIExtHeader *eh; + MOBIPdbRecord *rec; + struct m *next; +} MOBIData; + +void write_mobi(void); +int mobi_load_file(MOBIData *m, FILE *file); +int mobi_load_filename(MOBIData *m, const char *path); +MOBIData * mobi_init(); +void mobi_free(MOBIData *m); + +int mobi_parse_huffdic(MOBIData *m, MOBIHuffCdic *cdic); +MOBIPdbRecord * mobi_get_record_by_uid(MOBIData *m, size_t uid); +MOBIPdbRecord * mobi_get_record_by_seqnumber(MOBIData *m, size_t uid); +int mobi_get_rawml(MOBIData *m, char *text, size_t len); +int mobi_dump_rawml(MOBIData *m, FILE *file); +void mobi_get_fullname(MOBIData *m, char *fullname, size_t len); +int mobi_get_kf8boundary(MOBIData *m); +#endif diff --git a/src/read.c b/src/read.c new file mode 100644 index 0000000..761cca5 --- /dev/null +++ b/src/read.c @@ -0,0 +1,557 @@ +// +// read.c +// mobi +// +// Created by Bartek on 26.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include +#include "read.h" + +int mobi_load_pdbheader(MOBIData *m, FILE *file) { + MOBIBuffer *buf; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + if (!file) { + return MOBI_ERROR; + } + buf = buffer_init(PALMDB_HEADER_LEN); + if (buf == NULL) { + return MOBI_ERROR; + } + size_t len = fread(buf->data, 1, PALMDB_HEADER_LEN, file); + if (len != PALMDB_HEADER_LEN) { + buffer_free(buf); + return MOBI_ERROR; + } + m->ph = calloc(1, sizeof(MOBIPdbHeader)); + if (m->ph == NULL) { + printf("Memory allocation for pdb header failed\n"); + return MOBI_ERROR; + } + // parse header + buffer_getstring(m->ph->name, buf, PALMDB_NAME_SIZE_MAX); + m->ph->name[PALMDB_NAME_SIZE_MAX] = '\0'; + m->ph->attributes = buffer_get16(buf); + m->ph->version = buffer_get16(buf); + m->ph->ctime = buffer_get32(buf); + m->ph->mtime = buffer_get32(buf); + m->ph->btime = buffer_get32(buf); + m->ph->mod_num = buffer_get32(buf); + m->ph->appinfo_offset = buffer_get32(buf); + m->ph->sortinfo_offset = buffer_get32(buf); + buffer_getstring(m->ph->type, buf, 4); + m->ph->type[4] = '\0'; + buffer_getstring(m->ph->creator, buf, 4); + m->ph->creator[4] = '\0'; + m->ph->uid = buffer_get32(buf); + m->ph->next_rec = buffer_get32(buf); + m->ph->rec_count = buffer_get16(buf); + buffer_free(buf); + return MOBI_SUCCESS; +} + +int mobi_load_reclist(MOBIData *m, FILE *file) { + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + if (!file) { + printf("File not ready\n"); + return MOBI_ERROR; + } + int i; + MOBIBuffer *buf; + MOBIPdbRecord *curr; + m->rec = calloc(1, sizeof(MOBIPdbRecord)); + if (m->rec == NULL) { + printf("Memory allocation for pdb record failed\n"); + return MOBI_ERROR; + } + curr = m->rec; + for (i = 0; i < m->ph->rec_count; i++) { + buf = buffer_init(PDB_RECORD_INFO_SIZE); + if (buf == NULL) { + return MOBI_ERROR; + } + size_t len = fread(buf->data, 1, PDB_RECORD_INFO_SIZE, file); + if (len != PDB_RECORD_INFO_SIZE) { + buffer_free(buf); + return MOBI_ERROR; + } + if (i > 0) { + curr->next = calloc(1, sizeof(MOBIPdbRecord)); + if (curr->next == NULL) { + printf("Memory allocation for pdb record failed\n"); + return MOBI_ERROR; + } + curr = curr->next; + } + curr->offset = buffer_get32(buf); + curr->attributes = buffer_get8(buf); + uint8_t h = buffer_get8(buf); + uint16_t l = buffer_get16(buf); + curr->uid = h << 16 | l; + curr->next = NULL; + buffer_free(buf); + } + return MOBI_SUCCESS; +} + +int mobi_load_recdata(MOBIData *m, FILE *file) { + MOBIPdbRecord *curr, *next; + int ret; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + curr = m->rec; + while (curr != NULL) { + size_t size; + if (curr->next != NULL) { + next = curr->next; + size = next->offset - curr->offset; + } else { + fseek(file, 0, SEEK_END); + size = ftell(file) - curr->offset; + next = NULL; + } + + curr->size = size; + ret = mobi_load_rec(curr, file); + if (ret == MOBI_ERROR) { + printf("Error loading record uid %i data\n", curr->uid); + mobi_free_rec(m); + return MOBI_ERROR; + } + curr = next; + } + return MOBI_SUCCESS; +} + +int mobi_load_rec(MOBIPdbRecord *rec, FILE *file) { + size_t len; + int ret; + ret = fseek(file, rec->offset, SEEK_SET); + if (ret != 0) { + printf("Record %i not found\n", rec->uid); + return MOBI_ERROR; + } + rec->data = malloc(rec->size); + if (rec->data == NULL) { + printf("Memory allocation for pdb record data failed\n"); + return MOBI_ERROR; + } + len = fread(rec->data, 1, rec->size, file); + if (len < rec->size) { + printf("Truncated data in record %i\n", rec->uid); + rec->size = len; + char *ptr = realloc(rec->data, len); + if (ptr) { + rec->data = ptr; + } + } + return MOBI_SUCCESS; +} + +int mobi_parse_extheader(MOBIData *m, MOBIBuffer *buf) { + size_t saved_maxlen; + char exth_magic[4]; + size_t exth_length; + size_t rec_count; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + buffer_getstring(exth_magic, buf, 4); + exth_length = buffer_get32(buf); + rec_count = buffer_get32(buf); + if (strncmp(exth_magic, EXTH_MAGIC, 4) != 0 || + exth_length + buf->offset + 8 > buf->maxlen || + rec_count == 0) { + return MOBI_ERROR; + } + saved_maxlen = buf->maxlen; + buf->maxlen = exth_length + buf->offset - 8; + m->eh = calloc(1, sizeof(MOBIExtHeader)); + if (m->eh == NULL) { + printf("Memory allocation for EXTH header failed\n"); + return MOBI_ERROR; + } + int i; + MOBIExtHeader *curr; + curr = m->eh; + for (i = 0; i < rec_count; i++) { + if (i > 0) { + curr->next = calloc(1, sizeof(MOBIExtHeader)); + if (curr->next == NULL) { + printf("Memory allocation for EXTH header failed\n"); + return MOBI_ERROR; + } + curr = curr->next; + } + curr->uid = buffer_get32(buf); + // data size = record size minus 8 bytes for uid and size + curr->size = buffer_get32(buf) - 8; + if (curr->size == 0) { + printf("Skip record %i, data too short\n", curr->uid); + continue; + } + curr->data = malloc(curr->size); + if (curr->data == NULL) { + printf("Memory allocation for EXTH record %i failed\n", curr->uid); + mobi_free_eh(m); + return MOBI_ERROR; + } + buffer_getraw(curr->data, buf, curr->size); + curr->next = NULL; + } + buf->maxlen = saved_maxlen; + return MOBI_SUCCESS; +} + +int mobi_parse_mobiheader(MOBIData *m, MOBIBuffer *buf) { + size_t saved_maxlen; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + m->mh = calloc(1, sizeof(MOBIMobiHeader)); + if (m->mh == NULL) { + printf("Memory allocation for MOBI header failed\n"); + return MOBI_ERROR; + } + buffer_getstring(m->mh->mobi_magic, buf, 4); + m->mh->mobi_magic[4] = '\0'; + buffer_copy32(&m->mh->header_length, buf); + if (strcmp(m->mh->mobi_magic, MOBI_MAGIC) != 0 || m->mh->header_length == NULL) { + printf("MOBI header not found\n"); + mobi_free_mh(m); + return MOBI_ERROR; + } + saved_maxlen = buf->maxlen; + // read only declared MOBI header length (curr offset minus 8 already read bytes) + buf->maxlen = *m->mh->header_length + buf->offset - 8; + buffer_copy32(&m->mh->mobi_type, buf); + buffer_copy32(&m->mh->text_encoding, buf); + buffer_copy32(&m->mh->uid, buf); + buffer_copy32(&m->mh->file_version, buf); + buffer_copy32(&m->mh->orth_index, buf); + buffer_copy32(&m->mh->infl_index, buf); + buffer_copy32(&m->mh->names_index, buf); + buffer_copy32(&m->mh->keys_index, buf); + buffer_copy32(&m->mh->extra0_index, buf); + buffer_copy32(&m->mh->extra1_index, buf); + buffer_copy32(&m->mh->extra2_index, buf); + buffer_copy32(&m->mh->extra3_index, buf); + buffer_copy32(&m->mh->extra4_index, buf); + buffer_copy32(&m->mh->extra5_index, buf); + buffer_copy32(&m->mh->non_text_index, buf); + buffer_copy32(&m->mh->full_name_offset, buf); + buffer_copy32(&m->mh->full_name_length, buf); + buffer_copy32(&m->mh->locale, buf); + buffer_copy32(&m->mh->input_lang, buf); + buffer_copy32(&m->mh->output_lang, buf); + buffer_copy32(&m->mh->min_version, buf); + buffer_copy32(&m->mh->image_index, buf); + buffer_copy32(&m->mh->huff_rec_index, buf); + buffer_copy32(&m->mh->huff_rec_count, buf); + buffer_copy32(&m->mh->huff_table_offset, buf); + buffer_copy32(&m->mh->huff_table_length, buf); + buffer_copy32(&m->mh->exth_flags, buf); + buf->offset += 32; // 32 unknown bytes + buffer_copy32(&m->mh->unknown6, buf); + buffer_copy32(&m->mh->drm_offset, buf); + buffer_copy32(&m->mh->drm_count, buf); + buffer_copy32(&m->mh->drm_size, buf); + buffer_copy32(&m->mh->drm_flags, buf); + buf->offset += 8; // 8 unknown bytes + buffer_copy16(&m->mh->first_text_index, buf); + buffer_copy16(&m->mh->last_text_index, buf); + buffer_copy32(&m->mh->unknown9, buf); + buffer_copy32(&m->mh->fcis_index, buf); + buffer_copy32(&m->mh->fcis_count, buf); + buffer_copy32(&m->mh->flis_index, buf); + buffer_copy32(&m->mh->flis_count, buf); + buffer_copy32(&m->mh->unknown10, buf); + buffer_copy32(&m->mh->unknown11, buf); + buffer_copy32(&m->mh->srcs_index, buf); + buffer_copy32(&m->mh->srcs_count, buf); + buffer_copy32(&m->mh->unknown12, buf); + buffer_copy32(&m->mh->unknown13, buf); + buf->offset += 2; // 2 byte fill + buffer_copy16(&m->mh->extra_flags, buf); + buffer_copy32(&m->mh->ncx_index, buf); + buffer_copy32(&m->mh->unknown14, buf); + buffer_copy32(&m->mh->unknown15, buf); + buffer_copy32(&m->mh->datp_index, buf); + buffer_copy32(&m->mh->unknown16, buf); + buffer_copy32(&m->mh->unknown17, buf); + buffer_copy32(&m->mh->unknown18, buf); + buffer_copy32(&m->mh->unknown19, buf); + buffer_copy32(&m->mh->unknown20, buf); + if (buf->maxlen > buf->offset) { + buf->offset = buf->maxlen; + } + buf->maxlen = saved_maxlen; + return MOBI_SUCCESS; +} + + +// parse +int mobi_parse_record0(MOBIData *m, size_t seqnumber) { + MOBIBuffer *buf; + MOBIPdbRecord *record0; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + record0 = mobi_get_record_by_seqnumber(m, seqnumber); + if (record0 == NULL || record0->size == 0) { + printf("Record 0 not loaded\n"); + return MOBI_ERROR; + } + buf = buffer_init(record0->size); + if (buf == NULL) { + return MOBI_ERROR; + } + memcpy(buf->data, record0->data, record0->size); + m->rh = calloc(1, sizeof(MOBIRecord0Header)); + if (m->rh == NULL) { + printf("Memory allocation for record 0 header failed\n"); + return MOBI_ERROR; + } + // parse palmdoc header + m->rh->compression_type = buffer_get16(buf); + buf->offset += 2; // unused, 0 + m->rh->text_length = buffer_get32(buf); + m->rh->text_record_count = buffer_get16(buf); + m->rh->text_record_size = buffer_get16(buf); + m->rh->encryption_type = buffer_get16(buf); + m->rh->unknown1 = buffer_get16(buf); + if (strcmp(m->ph->type, "BOOK") == 0 && strcmp(m->ph->creator, "MOBI") == 0) { + // parse mobi header + mobi_parse_mobiheader(m, buf); + // parse exth header + mobi_parse_extheader(m, buf); + } + buffer_free(buf); + return MOBI_SUCCESS; +} + +int mobi_parse_huff(MOBIHuffCdic *huffcdic, MOBIPdbRecord *record) { + MOBIBuffer *buf; + char huff_magic[5]; + size_t header_length; + buf = buffer_init(record->size); + if (buf == NULL) { + return MOBI_ERROR; + } + memcpy(buf->data, record->data, record->size); + buffer_getstring(huff_magic, buf, 4); + header_length = buffer_get32(buf); + if (strncmp(huff_magic, HUFF_MAGIC, 4) != 0 || header_length < HUFF_HEADER_LEN) { + printf("HUFF wrong magic: %s\n", huff_magic); + buffer_free(buf); + return MOBI_ERROR; + } + size_t data1_offset = buffer_get32(buf); + size_t data2_offset = buffer_get32(buf); + // skip little-endian table offsets + buf->offset = data1_offset; + if (buf->offset + (256 * 4) > buf->maxlen) { + printf("HUFF data1 too short\n"); + buffer_free(buf); + return MOBI_ERROR; + } + // read 256 indices from data1 big-endian + for (int i = 0; i < 256; i++) { + huffcdic->table1[i] = buffer_get32(buf); + } + buf->offset = data2_offset; + if (buf->offset + (64 * 4) > buf->maxlen) { + printf("HUFF data2 too short\n"); + buffer_free(buf); + return MOBI_ERROR; + } + // read 32 mincode-maxcode pairs from data2 big-endian + uint32_t mincode, maxcode; + huffcdic->mincode_table[0] = 0; + huffcdic->maxcode_table[0] = 0xFFFFFFFF; + for (int i = 1; i < 33; i++) { + mincode = buffer_get32(buf); + maxcode = buffer_get32(buf); + huffcdic->mincode_table[i] = mincode << (32 - i); + huffcdic->maxcode_table[i] = ((maxcode + 1) << (32 - i)) - 1; + } + buffer_free(buf); + return MOBI_SUCCESS; +} + +int mobi_parse_cdic(MOBIHuffCdic *huffcdic, MOBIPdbRecord *record, int num) { + MOBIBuffer *buf; + char cdic_magic[5]; + size_t header_length, index_count, code_length; + buf = buffer_init(record->size); + if (buf == NULL) { + return MOBI_ERROR; + } + memcpy(buf->data, record->data, record->size); + buffer_getstring(cdic_magic, buf, 4); + header_length = buffer_get32(buf); + if (strncmp(cdic_magic, CDIC_MAGIC, 4) != 0 || header_length < CDIC_HEADER_LEN) { + printf("CDIC wrong magic: %s\n", cdic_magic); + buffer_free(buf); + return MOBI_ERROR; + } + // variables in huffcdic initialized to zero with calloc + // save initial count and length + index_count = buffer_get32(buf); + code_length = buffer_get32(buf); + if (huffcdic->code_length && huffcdic->code_length != code_length) { + printf("Warning: CDIC different code length %zu in record %i, previous was %zu\n", huffcdic->code_length, record->uid, code_length); + } + if (huffcdic->index_count && huffcdic->index_count != index_count) { + printf("Warning: CDIC different index count %zu in record %i, previous was %zu\n", huffcdic->index_count, record->uid, index_count); + } + huffcdic->code_length = code_length; + huffcdic->index_count = index_count; + if (index_count == 0) { + printf("CDIC index count is null"); + buffer_free(buf); + return MOBI_ERROR; + } + // allocate memory for symbol offsets if not already allocated + if (num == 0) { + huffcdic->symbol_offsets = malloc(index_count * sizeof(*huffcdic->symbol_offsets)); + if (huffcdic->symbol_offsets == NULL) { + printf("CDIC cannot allocate memory"); + buffer_free(buf); + return MOBI_ERROR; + } + } + index_count -= huffcdic->index_read; + // limit number of records read to code_length bits + if (index_count >> code_length) { + index_count = (1 << code_length); + } + if (buf->offset + (index_count * 2) > buf->maxlen) { + printf("CDIC indices data too short\n"); + buffer_free(buf); + free(huffcdic->symbol_offsets); + return MOBI_ERROR; + } + // read i * 2 byte big-endian indices + while (index_count--) { + huffcdic->symbol_offsets[huffcdic->index_read++] = buffer_get16(buf); + } + if (buf->offset + code_length > buf->maxlen) { + printf("CDIC dictionary data too short"); + free(huffcdic->symbol_offsets); + buffer_free(buf); + return MOBI_ERROR; + } + // copy pointer to data + huffcdic->symbols[num] = record->data + CDIC_HEADER_LEN; + // free buffer + buffer_free(buf); + return MOBI_SUCCESS; +} + +int mobi_parse_huffdic(MOBIData *m, MOBIHuffCdic *huffcdic) { + MOBIPdbRecord *curr; + int ret, i = 0; + if (m->mh == NULL || m->mh->huff_rec_index == NULL) { + printf("HUFF/CDIC records metadata not found in MOBI header\n"); + return MOBI_ERROR; + } + size_t huff_rec_index = *m->mh->huff_rec_index; + size_t huff_rec_count = *m->mh->huff_rec_count; + curr = mobi_get_record_by_seqnumber(m, huff_rec_index); + if (curr == NULL) { + printf("HUFF record not found\n"); + return MOBI_ERROR; + } + if (curr->size < HUFF_RECORD_MINSIZE) { + printf("HUFF record too short (%zu b)\n", curr->size); + return MOBI_ERROR; + } + ret = mobi_parse_huff(huffcdic, curr); + if (ret == MOBI_ERROR) { + printf("HUFF parsing failed\n"); + return MOBI_ERROR; + } + //huff_rec_index++; + curr = curr->next; + // allocate memory for symbols data in each CDIC record + huffcdic->symbols = malloc((huff_rec_count - 1) * sizeof(*huffcdic->symbols)); + // get following CDIC records + while (i < huff_rec_count - 1) { + ret = mobi_parse_cdic(huffcdic, curr, i++); + if (ret == MOBI_ERROR) { + printf("CDIC parsing failed\n"); + free(huffcdic->symbols); + return MOBI_ERROR; + } + curr = curr->next; + } + + return MOBI_SUCCESS; +} +int mobi_load_file(MOBIData *m, FILE *file) { + int ret; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + ret = mobi_load_pdbheader(m, file); + + if (strcmp(m->ph->type, "BOOK") != 0 && strcmp(m->ph->type, "TEXt") != 0) { + printf("Unsupported file type: %s\n", m->ph->type); + return MOBI_ERROR; + } + + if (ret == MOBI_ERROR || m->ph->rec_count == 0) { + printf("No records found\n"); + return MOBI_ERROR; + } + ret = mobi_load_reclist(m, file); + if (ret == MOBI_ERROR) { + return MOBI_ERROR; + } + ret = mobi_load_recdata(m, file); + if (ret == MOBI_ERROR) { + return MOBI_ERROR; + } + ret = mobi_parse_record0(m, 0); + // if EXTH is loaded and use_kf8 flag is set parse KF8 record0 for joined mobi7/kf8 file + if (m->eh && m->use_kf8) { + int boundary_rec_number; + boundary_rec_number = mobi_get_kf8boundary(m); + if (boundary_rec_number >= 0) { + // it is a joint mobi7/kf8 file + m->next = mobi_init(); + // link pdb header and records data to kf8data structure + m->next->ph = m->ph; + m->next->rec = m->rec; + // close next loop + m->next->next = m; + ret = mobi_parse_record0(m->next, boundary_rec_number + 1); + mobi_swap_mobidata(m); + } + } + return ret; +} + +int mobi_load_filename(MOBIData *m, const char *path) { + FILE *file; + int ret; + file = fopen(path, "rb"); + ret = mobi_load_file(m, file); + fclose(file); + return ret; +} diff --git a/src/read.h b/src/read.h new file mode 100644 index 0000000..5baa6fd --- /dev/null +++ b/src/read.h @@ -0,0 +1,21 @@ +// +// read.h +// mobi +// +// Created by Bartek on 26.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_read_h +#define mobi_read_h + +#include "mobi.h" +#include "memory.h" +#include "util.h" + +int mobi_load_pdbheader(MOBIData *m, FILE *file); +int mobi_load_reclist(MOBIData *m, FILE *file); +int mobi_load_recdata(MOBIData *m, FILE *file); +int mobi_load_rec(MOBIPdbRecord *rec, FILE *file); + +#endif diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..600e1e8 --- /dev/null +++ b/src/util.c @@ -0,0 +1,331 @@ +// +// util.c +// mobi +// +// Created by Bartek on 08.04.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include "util.h" + +void mobi_get_fullname(MOBIData *m, char *fullname, size_t len) { + fullname[0] = '\0'; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return; + } + MOBIPdbRecord *record0 = mobi_get_record_by_seqnumber(m, 0); + if (m->mh == NULL || m->mh->full_name_offset == NULL || record0 == NULL) { + return; + } + strncpy(fullname, record0->data + *m->mh->full_name_offset, len); +} + +MOBIPdbRecord * mobi_get_record_by_uid(MOBIData *m, size_t uid) { + MOBIPdbRecord *curr; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return NULL; + } + if (m->rec == NULL) { + return NULL; + } + curr = m->rec; + while (curr != NULL) { + if (curr->uid == uid) { + return curr; + } + curr = curr->next; + } + return NULL; +} + +MOBIPdbRecord * mobi_get_record_by_seqnumber(MOBIData *m, size_t num) { + MOBIPdbRecord *curr; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return NULL; + } + if (m->rec == NULL) { + return NULL; + } + int i = 0; + curr = m->rec; + while (curr != NULL) { + if (i++ == num) { + return curr; + } + curr = curr->next; + } + return NULL; +} + +int mobi_delete_record_by_seqnumber(MOBIData *m, size_t num) { + MOBIPdbRecord *curr, *prev; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + if (m->rec == NULL) { + return MOBI_ERROR; + } + int i = 0; + curr = m->rec; + prev = NULL; + while (curr != NULL) { + if (i++ == num) { + if (prev == NULL) { + m->rec = curr->next; + } else { + prev->next = curr->next; + } + free(curr->data); + curr->data = NULL; + free(curr); + curr = NULL; + return MOBI_SUCCESS; + } + prev = curr; + curr = curr->next; + } + return MOBI_SUCCESS; +} + +MOBIExtHeader * mobi_get_exthtag_by_uid(MOBIData *m, size_t uid) { + MOBIExtHeader *curr; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return NULL; + } + if (m->eh == NULL) { + return NULL; + } + curr = m->eh; + while (curr != NULL) { + if (curr->uid == uid) { + return curr; + } + curr = curr->next; + } + return NULL; +} + +size_t sizeof_trailing_entry(MOBIPdbRecord *record, size_t psize) { + size_t bitpos = 0; + size_t result = 0; + uint8_t v; + while (1) { + v = *(record->data + psize - 1); + result |= (v & 0x7F) << bitpos; + bitpos += 7; + psize -= 1; + if ((v & 0x80) != 0 || (bitpos >= 28) || (psize == 0)) { + return result; + } + } +} + +size_t mobi_get_record_extrasize(MOBIPdbRecord *record, uint16_t flags) { + size_t num, size; + num = 0; + size = record->size; + int mb_flag = flags & 1; + flags >>= 1; + while (flags) { + if (flags & 1) { + num += sizeof_trailing_entry(record, size - num); + } + flags >>= 1; + } + if (mb_flag){ + num += (*(record->data + size - num - 1) & 0x3) + 1; + } + return num; +} + +/*size_t mobi_get_record_extrasize(MOBIPdbRecord *record, uint16_t flags) { + size_t extra_size = 0, offset = 1; + uint8_t b; + for (int bit = 15; bit > 0; bit--) { + if (flags & (1 << bit)) { + // bit is set + int bit_count = 0; + do { + // read at most 4 * 7-bit ints, bit 7 set stops search + b = *(record->data + record->size - offset); + extra_size |= (b & 0x7f) << bit_count; + bit_count += 7; + offset++; + } while (!(b & 0x80) && (bit_count < 28) && offset < record->size); + offset += extra_size - 1; + } + }; + // check bit 0 + if (flags & 1) { + if (offset < record->size) { + b = *(record->data + record->size - offset); + // two first bits hold size + extra_size += (b & 0x3) + 1; + } + + } + return extra_size; +}*/ + +// wrapper for mobi_get_rawml and mobi_dump_rawml +int mobi_decompress_content(MOBIData *m, char *text, FILE *file, size_t len, int dump) { + MOBIPdbRecord *curr; + size_t text_rec_index; + size_t offset = 0; + size_t text_length = 0; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + // check if we want to parse kf8 part of joint file + if (m->use_kf8 && m->next != NULL) { + int kf8_offset = mobi_get_kf8boundary(m->next); + if (kf8_offset >= 0) { + // kf8 boundary + 1 * record0 + offset = kf8_offset + 1; + } + } + if (m->rh == NULL || m->rh->text_record_count == 0) { + printf("Text records not found in MOBI header\n"); + return MOBI_ERROR; + } + text_rec_index = 1 + offset; + size_t text_rec_count = m->rh->text_record_count; + uint16_t compression_type = m->rh->compression_type; + // check for extra data at the end of text files + uint16_t extra_flags = 0, extra_size = 0; + if (m->mh && m->mh->extra_flags) { + extra_flags = *m->mh->extra_flags; + } + // get first text record + curr = mobi_get_record_by_seqnumber(m, text_rec_index); + + size_t d_size, record_size; + char decompressed[2*RECORD0_RECORD_SIZE_MAX + 32]; // FIXME debug + MOBIHuffCdic *huffcdic = NULL; + if (compression_type == RECORD0_HUFF_COMPRESSION) { + // load huff/cdic tables + huffcdic = mobi_init_huffcdic(m); + } + // get following CDIC records + while (text_rec_count--) { + if (curr->uid == 17622) { // FIXME debug + ;; + } + if (extra_flags) { + extra_size = mobi_get_record_extrasize(curr, extra_flags); + } + record_size = curr->size - extra_size; + switch (compression_type) { + case RECORD0_NO_COMPRESSION: + // no compression + strncat(decompressed, curr->data, curr->size); + d_size = curr->size; + break; + case RECORD0_PALMDOC_COMPRESSION: + // palmdoc lz77 compression + d_size = mobi_decompress_lz77(decompressed, curr->data, record_size); + break; + case RECORD0_HUFF_COMPRESSION: + // mobi huffman compression + d_size = mobi_decompress_huffman(decompressed, curr->data, record_size, huffcdic, 0); + if (d_size > RECORD0_RECORD_SIZE_MAX) { + d_size = RECORD0_RECORD_SIZE_MAX; + } + break; + default: + printf("Unknown compression type\n"); + return MOBI_ERROR; + } + curr = curr->next; + text_length += d_size; + + if (dump) { + fwrite(decompressed, 1, d_size, file); + } else { + if (text_length > len) { + printf("Text buffer too small\n"); + // free huff/cdic tables + if (compression_type == RECORD0_HUFF_COMPRESSION) { + mobi_free_huffcdic(huffcdic); + } + return MOBI_ERROR; + } + strncat(text, decompressed, d_size); + } + } + // free huff/cdic tables + if (compression_type == RECORD0_HUFF_COMPRESSION) { + mobi_free_huffcdic(huffcdic); + } + return MOBI_SUCCESS; +} + +// copy raw text to text buffer +int mobi_get_rawml(MOBIData *m, char *text, size_t len) { + if (m->rh->text_length > len) { + printf("Text buffer smaller then text size declared in record0 header\n"); + return MOBI_ERROR; + } + text[0] = '\0'; + int ret = mobi_decompress_content(m, text, NULL, len, 0); + return ret; +} + +// dump raw text records to open file descriptor +int mobi_dump_rawml(MOBIData *m, FILE *file) { + int ret = mobi_decompress_content(m, NULL, file, 0, 1); + return ret; +} + +// return kf8 boundary record sequential number or -1 if no such record +int mobi_get_kf8boundary(MOBIData *m) { + MOBIExtHeader *exth_tag; + MOBIPdbRecord *record; + uint32_t rec_number; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return -1; + } + exth_tag = mobi_get_exthtag_by_uid(m, MOBI_EXTH_KF8BOUNDARY); + if (exth_tag != NULL) { + rec_number = * (uint32_t*) exth_tag->data; + if (is_littleendian()) { + rec_number = endian_swap32(rec_number); + } + rec_number--; + record = mobi_get_record_by_seqnumber(m, rec_number); + if (record) { + if(strcmp(record->data, "BOUNDARY") == 0) { + return rec_number; + } + } + } + return -1; +} + +int mobi_swap_mobidata(MOBIData *m) { + MOBIData *tmp; + tmp = malloc(sizeof(MOBIData)); + if (tmp == NULL) { + printf("memory allocation failed while swaping data\n"); + return MOBI_ERROR; + } + tmp->rh = m->rh; + tmp->mh = m->mh; + tmp->eh = m->eh; + m->rh = m->next->rh; + m->mh = m->next->mh; + m->eh = m->next->eh; + m->next->rh = tmp->rh; + m->next->mh = tmp->mh; + m->next->eh = tmp->eh; + free(tmp); + tmp = NULL; + return MOBI_SUCCESS; +} diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..61b8e8f --- /dev/null +++ b/src/util.h @@ -0,0 +1,17 @@ +// +// util.h +// mobi +// +// Created by Bartek on 08.04.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_util_h +#define mobi_util_h + +#include "mobi.h" +#include "memory.h" + +int mobi_delete_record_by_seqnumber(MOBIData *m, size_t num); +int mobi_swap_mobidata(MOBIData *m); +#endif diff --git a/src/write.c b/src/write.c new file mode 100644 index 0000000..050b708 --- /dev/null +++ b/src/write.c @@ -0,0 +1,149 @@ +// +// write.c +// mobi +// +// Created by Bartek on 25.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include +#include + +#include "write.h" + +MOBIBuffer * serialize_palmdb_header(void) { + MOBIBuffer *buf; + size_t len; + char title[PALMDB_NAME_SIZE_MAX]; + strcpy(title, "TITLE"); + len = strlen(title); + + uint32_t curtime = (uint32_t)(time(NULL) + EPOCH_MAC_DIFF); + uint32_t uid = 0xff; + uint32_t rec_count = 1; + buf = buffer_init(PALMDB_HEADER_LEN); + if (buf == NULL) { + return NULL; + } + buffer_addstring(buf, title); + buffer_addzeros(buf, PALMDB_NAME_SIZE_MAX - len); + buffer_add16(buf, PALMDB_ATTRIBUTE_DEFAULT); + buffer_add16(buf, PALMDB_VERSION_DEFAULT); + buffer_add32(buf, curtime); // ctime + buffer_add32(buf, curtime); // mtime + buffer_add32(buf, 0); // btime + buffer_add32(buf, PALMDB_MODNUM_DEFAULT); + buffer_add32(buf, PALMDB_APPINFO_DEFAULT); + buffer_add32(buf, PALMDB_SORTINFO_DEFAULT); + buffer_addstring(buf, PALMDB_TYPE_DEFAULT); + buffer_addstring(buf, PALMDB_CREATOR_DEFAULT); + buffer_add32(buf, uid); + buffer_add32(buf, PALMDB_NEXTREC_DEFAULT); + buffer_add16(buf, rec_count); + return buf; +} + +MOBIBuffer * serialize_record0_header(void) { + MOBIBuffer *buf; + uint32_t text_length = 0; + uint16_t record_count = 0; + buf = buffer_init(RECORD0_HEADER_LEN); + if (buf == NULL) { + return NULL; + } + buffer_add16(buf, RECORD0_NO_COMPRESSION); + buffer_add16(buf, 0); + buffer_add32(buf, text_length); + buffer_add16(buf, record_count); + buffer_add16(buf, RECORD0_RECORD_SIZE_MAX); + buffer_add16(buf, RECORD0_NO_ENCRYPTION); + buffer_add16(buf, 0); + return buf; +} + +void buffer_output(FILE *file, MOBIBuffer *buf) { + if (file) { + fwrite(buf->data, 1, buf->offset, file); + printf("Buffer length %zu bytes\n", buf->offset); + } + buffer_free(buf); +} + +MOBIPdbRecord * build_pdbrecord(size_t offset) { + MOBIPdbRecord *record = NULL; + record = malloc(sizeof(MOBIPdbRecord)); + record->data = malloc(RECORD0_RECORD_SIZE_MAX); + strncpy(record->data, "test", RECORD0_RECORD_SIZE_MAX); + if (record->data == NULL) { + free(record); + return NULL; + } + record->offset = offset; + record->size = offset; + record->attributes = 0; + record->uid = 0; + return record; +} + +MOBIBuffer * serialize_record_info(MOBIPdbRecord *rec) { + MOBIBuffer *buf; + buf = buffer_init(8); + if (buf == NULL) { + return NULL; + } + buffer_add32(buf, (uint32_t) rec->offset); + //skip attributes, always 0; + buffer_add32(buf, rec->uid); + return buf; +} + +MOBIBuffer * serialize_pdbrecord(MOBIPdbRecord *rec) { + MOBIBuffer *buf; + buf = buffer_init(RECORD0_RECORD_SIZE_MAX); + if (buf) { + buffer_addstring(buf, rec->data); + } + return buf; +} + +MOBIBuffer * serialize_file_end(void) { + MOBIBuffer *buf; + char end[] = { 233, 142, 13, 10 }; + buf = buffer_init(4); + if (buf) { + buffer_addraw(buf, end, 4); + } + return buf; +} + + +void write_mobi(void) { + FILE *file; + MOBIBuffer *buf; + MOBIPdbRecord *rec; + file = fopen("/Users/baf/src/mobi_test/test.mobi","wb"); + buf = serialize_palmdb_header(); + printf("Writing palmdb header\n"); + buffer_output(file, buf); + rec = build_pdbrecord(PALMDB_HEADER_LEN + PDB_RECORD_INFO_SIZE + 2); + buf = serialize_record_info(rec); + buf->maxlen += 2; + buffer_addzeros(buf, 2); + printf("Writing record info + 2 zeros\n"); + buffer_output(file, buf); + buf = serialize_record0_header(); + printf("Writing record0 header\n"); + buffer_output(file, buf); + buf = serialize_pdbrecord(rec); + // TODO: improve freeing of rec buffer, see buffer_free + free(rec->data); + free(rec); + printf("Writing pdb record\n"); + buffer_output(file, buf); + buf = serialize_file_end(); + printf("Writing 4 end chars\n"); + buffer_output(file, buf); + + fclose(file); +} diff --git a/src/write.h b/src/write.h new file mode 100644 index 0000000..5b96c3b --- /dev/null +++ b/src/write.h @@ -0,0 +1,20 @@ +// +// write.h +// mobi +// +// Created by Bartek on 25.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_write_h +#define mobi_write_h + +#include "mobi.h" + +__attribute__((visibility("hidden"))) MOBIBuffer * buffer_init(size_t len); +__attribute__((visibility("hidden"))) void buffer_add8(MOBIBuffer *p, uint8_t data); +__attribute__((visibility("hidden"))) void buffer_add16(MOBIBuffer *p, uint16_t data); +__attribute__((visibility("hidden"))) void buffer_add32(MOBIBuffer *p, uint32_t data); +__attribute__((visibility("hidden"))) void buffer_addstring(MOBIBuffer *p, char *str); +__attribute__((visibility("hidden"))) void buffer_free(MOBIBuffer *p); +#endif diff --git a/tools/Makefile.am b/tools/Makefile.am new file mode 100644 index 0000000..07517c2 --- /dev/null +++ b/tools/Makefile.am @@ -0,0 +1,12 @@ +# tools + +# what flags you want to pass to the C compiler & linker +AM_CFLAGS = --pedantic -Wall -std=gnu99 -O2 +AM_LDFLAGS = + +# this lists the binaries to produce, the (non-PHONY, binary) targets in +# the previous manual Makefile +bin_PROGRAMS = mobitool +man_MANS = mobitool.1 +mobitool_SOURCES = mobitool.c +mobitool_LDADD = $(top_builddir)/src/libmobi.la diff --git a/tools/mobitool.1 b/tools/mobitool.1 new file mode 100644 index 0000000..a8dd9a3 --- /dev/null +++ b/tools/mobitool.1 @@ -0,0 +1,79 @@ +.\"Modified from man(1) of FreeBSD, the NetBSD mdoc.template, and mdoc.samples. +.\"See Also: +.\"man mdoc.samples for a complete listing of options +.\"man mdoc for the short list of editing options +.\"/usr/share/misc/mdoc.template +.Dd 26.03.14 \" DATE +.Dt test 1 \" Program name and manual section number +.Os Darwin +.Sh NAME \" Section Header - required - don't modify +.Nm test, +.\" The following lines are read in generating the apropos(man -k) database. Use only key +.\" words here as the database is built based on the words here and in the .ND line. +.Nm Other_name_for_same_program(), +.Nm Yet another name for the same program. +.\" Use .Nm macro to designate other names for the documented program. +.Nd This line parsed for whatis database. +.Sh SYNOPSIS \" Section Header - required - don't modify +.Nm +.Op Fl abcd \" [-abcd] +.Op Fl a Ar path \" [-a path] +.Op Ar file \" [file] +.Op Ar \" [file ...] +.Ar arg0 \" Underlined argument - use .Ar anywhere to underline +arg2 ... \" Arguments +.Sh DESCRIPTION \" Section Header - required - don't modify +Use the .Nm macro to refer to your program throughout the man page like such: +.Nm +Underlining is accomplished with the .Ar macro like this: +.Ar underlined text . +.Pp \" Inserts a space +A list of items with descriptions: +.Bl -tag -width -indent \" Begins a tagged list +.It item a \" Each item preceded by .It macro +Description of item a +.It item b +Description of item b +.El \" Ends the list +.Pp +A list of flags and their descriptions: +.Bl -tag -width -indent \" Differs from above in tag removed +.It Fl a \"-a flag as a list item +Description of -a flag +.It Fl b +Description of -b flag +.El \" Ends the list +.Pp +.\" .Sh ENVIRONMENT \" May not be needed +.\" .Bl -tag -width "ENV_VAR_1" -indent \" ENV_VAR_1 is width of the string ENV_VAR_1 +.\" .It Ev ENV_VAR_1 +.\" Description of ENV_VAR_1 +.\" .It Ev ENV_VAR_2 +.\" Description of ENV_VAR_2 +.\" .El +.Sh FILES \" File used or created by the topic of the man page +.Bl -tag -width "/Users/joeuser/Library/really_long_file_name" -compact +.It Pa /usr/share/file_name +FILE_1 description +.It Pa /Users/joeuser/Library/really_long_file_name +FILE_2 description +.El \" Ends the list +.\" .Sh DIAGNOSTICS \" May not be needed +.\" .Bl -diag +.\" .It Diagnostic Tag +.\" Diagnostic informtion here. +.\" .It Diagnostic Tag +.\" Diagnostic informtion here. +.\" .El +.Sh SEE ALSO +.\" List links in ascending order by section, alphabetically within a section. +.\" Please do not reference files that do not exist without filing a bug report +.Xr a 1 , +.Xr b 1 , +.Xr c 1 , +.Xr a 2 , +.Xr b 2 , +.Xr a 3 , +.Xr b 3 +.\" .Sh BUGS \" Document known, unremedied bugs +.\" .Sh HISTORY \" Document history if command behaves in a unique manner \ No newline at end of file diff --git a/tools/mobitool.c b/tools/mobitool.c new file mode 100644 index 0000000..6ad9e0d --- /dev/null +++ b/tools/mobitool.c @@ -0,0 +1,221 @@ +// +// test.c +// mobi +// +// Created by Bartek on 25.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include "../src/mobi.h" +//#include + +// FIXME: testing +#define DUMP_REC_OPT 0; +#define LOADFILENAME 1 + +void print_meta(MOBIData *m) { + if (m->mh && m->mh->full_name_offset && m->mh->full_name_length) { + char *full_name; + size_t len = *m->mh->full_name_length; + full_name = malloc(len + 1); + mobi_get_fullname(m, full_name, len); + printf("full name: %s\n", full_name); + free(full_name); + } + printf("name: %s\n", m->ph->name); + printf("attributes: %hu\n", m->ph->attributes); + printf("version: %hu\n", m->ph->version); + printf("ctime: %u\n", m->ph->ctime); + printf("mtime: %u\n", m->ph->mtime); + printf("mtime: %u\n", m->ph->mtime); + printf("btime: %u\n", m->ph->btime); + printf("mod_num: %u\n", m->ph->mod_num); + printf("appinfo_offset: %u\n", m->ph->appinfo_offset); + printf("sortinfo_offset: %u\n", m->ph->sortinfo_offset); + printf("type: %s\n", m->ph->type); + printf("creator: %s\n", m->ph->creator); + printf("uid: %u\n", m->ph->uid); + printf("next_rec: %u\n", m->ph->next_rec); + printf("rec_count: %u\n", m->ph->rec_count); + if (m->rh) { + printf("\nRecord 0:\n"); + printf("compresion type: %u\n", m->rh->compression_type); + printf("text length: %u\n", m->rh->text_length); + printf("record count: %u\n", m->rh->text_record_count); + printf("record size: %u\n", m->rh->text_record_size); + printf("encryption type: %u\n", m->rh->encryption_type); + printf("unknown: %u\n", m->rh->unknown1); + } + if (m->mh) { + printf("identifier: %s\n", m->mh->mobi_magic); + if(m->mh->header_length) { printf("header length: %u\n", *m->mh->header_length); } + if(m->mh->mobi_type) { printf("mobi type: %u\n", *m->mh->mobi_type); } + if(m->mh->text_encoding) { printf("text encoding: %u\n", *m->mh->text_encoding); } + if(m->mh->uid) { printf("unique id: %u\n", *m->mh->uid); } + if(m->mh->file_version) { printf("file version: %u\n", *m->mh->file_version); } + if(m->mh->orth_index) { printf("orth index: %u\n", *m->mh->orth_index); } + if(m->mh->infl_index) { printf("infl index: %u\n", *m->mh->infl_index); } + if(m->mh->names_index) { printf("names index: %u\n", *m->mh->names_index); } + if(m->mh->keys_index) { printf("keys index: %u\n", *m->mh->keys_index); } + if(m->mh->extra0_index) { printf("extra0 index: %u\n", *m->mh->extra0_index); } + if(m->mh->extra1_index) { printf("extra1 index: %u\n", *m->mh->extra1_index); } + if(m->mh->extra2_index) { printf("extra2 index: %u\n", *m->mh->extra2_index); } + if(m->mh->extra3_index) { printf("extra3 index: %u\n", *m->mh->extra3_index); } + if(m->mh->extra4_index) { printf("extra4 index: %u\n", *m->mh->extra4_index); } + if(m->mh->extra5_index) { printf("extra5 index: %u\n", *m->mh->extra5_index); } + if(m->mh->non_text_index) { printf("non text index: %u\n", *m->mh->non_text_index); } + if(m->mh->full_name_offset) { printf("full name offset: %u\n", *m->mh->full_name_offset); } + if(m->mh->full_name_length) { printf("full name length: %u\n", *m->mh->full_name_length); } + if(m->mh->locale) { printf("locale: %u\n", *m->mh->locale); } + if(m->mh->input_lang) { printf("input lang: %u\n", *m->mh->input_lang); } + if(m->mh->output_lang) { printf("outpu lang: %u\n", *m->mh->output_lang); } + if(m->mh->min_version) { printf("minimal version: %u\n", *m->mh->min_version); } + if(m->mh->image_index) { printf("first image index: %u\n", *m->mh->image_index); } + if(m->mh->huff_rec_index) { printf("huffman record offset: %u\n", *m->mh->huff_rec_index); } + if(m->mh->huff_rec_count) { printf("huffman record count: %u\n", *m->mh->huff_rec_count); } + if(m->mh->huff_table_offset) { printf("huffman table offset: %u\n", *m->mh->huff_table_offset); } + if(m->mh->huff_table_length) { printf("huffman table length: %u\n", *m->mh->huff_table_length); } + if(m->mh->exth_flags) { printf("EXTH flags: %u\n", *m->mh->exth_flags); } + if(m->mh->unknown6) { printf("unknown: %u\n", *m->mh->unknown6); } + if(m->mh->drm_offset) { printf("drm offset: %u\n", *m->mh->drm_offset); } + if(m->mh->drm_size) { printf("drm size: %u\n", *m->mh->drm_size); } + if(m->mh->drm_flags) { printf("drm flags: %u\n", *m->mh->drm_flags); } + if(m->mh->first_text_index) { printf("first text index: %u\n", *m->mh->first_text_index); } + if(m->mh->last_text_index) { printf("last text index: %u\n", *m->mh->last_text_index); } + if(m->mh->unknown9) { printf("unknown: %u\n", *m->mh->unknown9); } + if(m->mh->fcis_index) { printf("FCIS index: %u\n", *m->mh->fcis_index); } + if(m->mh->fcis_count) { printf("FCIS count: %u\n", *m->mh->fcis_count); } + if(m->mh->flis_index) { printf("FLIS index: %u\n", *m->mh->flis_index); } + if(m->mh->flis_count) { printf("FLIS count: %u\n", *m->mh->flis_count); } + if(m->mh->unknown10) { printf("unknown: %u\n", *m->mh->unknown10); } + if(m->mh->unknown11) { printf("unknown: %u\n", *m->mh->unknown11); } + if(m->mh->srcs_index) { printf("SRCS index: %u\n", *m->mh->srcs_index); } + if(m->mh->srcs_count) { printf("SRCS count: %u\n", *m->mh->srcs_count); } + if(m->mh->unknown12) { printf("unknown: %u\n", *m->mh->unknown12); } + if(m->mh->unknown13) { printf("unknown: %u\n", *m->mh->unknown13); } + if(m->mh->extra_flags) { printf("extra record flags: %u\n", *m->mh->extra_flags); } + if(m->mh->ncx_index) { printf("NCX offset: %u\n", *m->mh->ncx_index); } + if(m->mh->unknown14) { printf("unknown: %u\n", *m->mh->unknown14); } + if(m->mh->unknown15) { printf("unknown: %u\n", *m->mh->unknown15); } + if(m->mh->datp_index) { printf("DATP index: %u\n", *m->mh->datp_index); } + if(m->mh->unknown16) { printf("unknown: %u\n", *m->mh->unknown16); } + if(m->mh->unknown17) { printf("unknown: %u\n", *m->mh->unknown17); } + if(m->mh->unknown18) { printf("unknown: %u\n", *m->mh->unknown18); } + if(m->mh->unknown19) { printf("unknown: %u\n", *m->mh->unknown19); } + if(m->mh->unknown20) { printf("unknown: %u\n", *m->mh->unknown20); } + } +} + +void print_exth(MOBIData *m) { + MOBIExtHeader *curr; + if (m->eh == NULL) { + return; + } + curr = m->eh; + while (curr != NULL) { + char *str; + uint32_t val; + str = calloc(1, curr->size+1); + strncpy(str, curr->data, curr->size); + val = *(uint32_t*) curr->data; + if (is_littleendian()) { + val = endian_swap32(val); + } + printf("id: %i\tval: %s (%u)\tsize: %zu\n", curr->uid, str, val, curr->size); + free(str); + curr = curr->next; + } +} + +void print_records_meta(MOBIData *m) { + MOBIPdbRecord *currec; + currec = m->rec; + while (currec != NULL) { + printf("offset: %zu\n", currec->offset); + printf("size: %zu\n", currec->size); + printf("attributes: %hhu\n", currec->attributes); + printf("uid: %u\n", currec->uid); + printf("\n"); + currec = currec->next; + } +} + +void dump_records(MOBIData *m, char *filepath) { + MOBIPdbRecord *currec; + FILE *file; + char name[FILENAME_MAX]; + int i = 0; + currec = m->rec; + while (currec != NULL) { + sprintf(name, "%spart_%i_uid_%i", filepath, i++, currec->uid); + file = fopen(name, "wb"); + fwrite(currec->data, 1, currec->size, file); + fclose(file); + currec = currec->next; + } +} + +int dump_rawml(MOBIData *m, char *filepath) { + FILE *file; + int ret; + char name[FILENAME_MAX]; + sprintf(name, "%srawml", filepath); + file = fopen(name, "wb"); + ret = mobi_dump_rawml(m, file); + fclose(file); + return ret; +} + +int loadfilename(const char *filename) { + MOBIData *m; + int ret = 0; + m = mobi_init(); + if (m == NULL) { + printf("init failed\n"); + return 1; + } + char filepath[FILENAME_MAX]; + char *p = strrchr(filename, '/'); + if (p) { + p += 1; + strncpy(filepath, filename, (p - filename)); + filepath[p - filename] = '\0'; + } + else { + filepath[0] = '\0'; + } + m->use_kf8 = MOBI_USE_KF7; + ret = mobi_load_filename(m, filename); + print_meta(m); + if (ret == MOBI_ERROR) { + mobi_free(m); + return 1; + } + print_exth(m); + print_records_meta(m); + int dump_rec_opt = DUMP_REC_OPT; + if (dump_rec_opt) { + dump_records(m, filepath); + } + ret = dump_rawml(m, filepath); + mobi_free(m); + return ret; +} + +int main(int argc, char *argv[]) { + if (argc != 2) { + printf("usage: %s filename\n", argv[0]); + return 1; + } + int command = LOADFILENAME; + int ret = 0; + char filename[FILENAME_MAX]; + strncpy(filename, argv[1], FILENAME_MAX - 1); + switch (command) { + case LOADFILENAME: + ret = loadfilename(filename); + break; + } + return ret; +}