diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..e69de29 diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..e69de29 diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..c6e73f5 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,8 @@ +# project Makefile.am + +SUBDIRS = src tools +ACLOCAL_AMFLAGS = -I m4 +# what flags you want to pass to the C compiler & linker +AM_CFLAGS = --pedantic -Wall -std=gnu99 -O2 +AM_LDFLAGS = + diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..e69de29 diff --git a/README b/README new file mode 100644 index 0000000..e69de29 diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..b483139 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,2 @@ +#!/bin/sh +autoreconf --force --install -I m4 diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..61d395d --- /dev/null +++ b/configure.ac @@ -0,0 +1,44 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([1.10]) +AC_INIT([libmobi], [0.1]) +AC_CONFIG_SRCDIR([src/buffer.c]) + +# Enable automake +AM_INIT_AUTOMAKE([-Wall -Werror foreign]) +# all defined C macros (HAVE_*) will be saved to this file +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_MACRO_DIR([m4]) + +# Checks for programs. +AC_PROG_CC +AC_PROG_INSTALL +m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) + +# Init libtool +LT_INIT + +# Checks for libraries. + +# Checks for header files. +AC_CHECK_HEADERS([stdlib.h string.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_TYPE_INT32_T +AC_TYPE_INT8_T +AC_TYPE_SIZE_T +AC_TYPE_UINT16_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T +AC_TYPE_UINT8_T + +# Checks for library functions. +AC_FUNC_MALLOC +AC_FUNC_REALLOC +AC_CHECK_FUNCS([memset strrchr]) + +AC_CONFIG_FILES([Makefile]) +AC_CONFIG_FILES([src/Makefile]) +AC_CONFIG_FILES([tools/Makefile]) +AC_OUTPUT diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..aebc615 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,11 @@ +# libmobi + +# what flags you want to pass to the C compiler & linker +AM_CFLAGS = --pedantic -Wall -std=gnu99 -O2 +AM_LDFLAGS = + +# this lists the binaries to produce, the (non-PHONY, binary) targets in +# the previous manual Makefile +lib_LTLIBRARIES = libmobi.la +libmobi_la_SOURCES = buffer.c compression.c debug.c memory.c read.c util.c write.c \ + buffer.h compression.h debug.h memory.h mobi.h read.h util.h write.h diff --git a/src/buffer.c b/src/buffer.c new file mode 100644 index 0000000..6d86244 --- /dev/null +++ b/src/buffer.c @@ -0,0 +1,191 @@ +// +// buffer.c +// mobi +// +// Created by Bartek on 27.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include "buffer.h" + +#define MAX_BUFFER_SIZE 4096 + +MOBIBuffer * buffer_init(size_t len) { + MOBIBuffer *p = NULL; + p = malloc(sizeof(MOBIBuffer)); + if (p == NULL) { + printf("Buffer allocation failed\n"); + return NULL; + } + p->data = malloc(len); + if (p->data == NULL) { + free(p); + printf("Buffer data allocation failed\n"); + return NULL; + } + p->offset = 0; + p->maxlen = len; + return p; +} + + +void buffer_add8(MOBIBuffer *p, uint8_t data) { + if (p->offset + 1 > p->maxlen) { + printf("Buffer full\n"); + return; + } + p->data[p->offset++] = data; +} + +void buffer_add16(MOBIBuffer *p, uint16_t data) { + if (p->offset + 2 > p->maxlen) { + printf("Buffer full\n"); + return; + } + p->data[p->offset++] = (data & 0xff00) >> 8; + p->data[p->offset++] = (data & 0xff); +} + +void buffer_add32(MOBIBuffer *p, uint32_t data) { + if (p->offset + 4 > p->maxlen) { + printf("Buffer full\n"); + return; + } + p->data[p->offset++] = (data & 0xff000000) >> 16; + p->data[p->offset++] = (data & 0xff0000) >> 12; + p->data[p->offset++] = (data & 0xff00) >> 8; + p->data[p->offset++] = (data & 0xff); +} + +void buffer_addraw(MOBIBuffer *p, char* buf, size_t len) { + if (p->offset + len > p->maxlen) { + printf("Buffer full\n"); + return; + } + memcpy(p->data + p->offset, buf, len); + p->offset += len; +} + +void buffer_addstring(MOBIBuffer *p, char *str) { + size_t len; + len = strlen(str); + buffer_addraw(p, str, len); +} + +void buffer_addzeros(MOBIBuffer *p, size_t count) { + if (p->offset + count > p->maxlen) { + printf("Buffer full\n"); + return; + } + memset(p->data + p->offset, 0, count); + p->offset += count; +} + +uint8_t buffer_get8(MOBIBuffer *p) { + if (p->offset + 1 > p->maxlen) { + printf("End of buffer\n"); + return 0; + } + return (uint8_t) p->data[p->offset++]; +} + +uint16_t buffer_get16(MOBIBuffer *p) { + if (p->offset + 2 > p->maxlen) { + printf("End of buffer\n"); + return 0; + } + uint16_t val; + val = (uint8_t) p->data[p->offset] << 8 | (uint8_t) p->data[p->offset + 1]; + p->offset += 2; + return val; +} + +uint32_t buffer_get32(MOBIBuffer *p) { + if (p->offset + 4 > p->maxlen) { + printf("End of buffer\n"); + return 0; + } + uint32_t val; + val = (uint8_t) p->data[p->offset] << 24 | (uint8_t) p->data[p->offset + 1] << 16 | (uint8_t) p->data[p->offset + 2] << 8 | (uint8_t) p->data[p->offset + 3]; + p->offset += 4; + return val; +} + +void buffer_getstring(char *str, MOBIBuffer *p, size_t len) { + if (p->offset + len > p->maxlen) { + printf("End of buffer\n"); + return; + } + strncpy(str, p->data + p->offset, len); + p->offset += len; +} + +void buffer_getraw(void *ptr, MOBIBuffer *p, size_t len) { + if (p->offset + len > p->maxlen) { + printf("End of buffer\n"); + return; + } + memcpy(ptr, p->data + p->offset, len); + p->offset += len; +} + +void buffer_copy8(uint8_t **val, MOBIBuffer *p) { + *val = NULL; + if (p->offset + 1 > p->maxlen) { + return; + } + *val = malloc(sizeof(uint8_t)); + if (*val == NULL) { + return; + } + **val = (uint8_t) p->data[p->offset++]; +} + +void buffer_copy16(uint16_t **val, MOBIBuffer *p) { + *val = NULL; + if (p->offset + 2 > p->maxlen) { + return; + } + *val = malloc(sizeof(uint16_t)); + if (*val == NULL) { + return; + } + **val = (uint8_t) p->data[p->offset] << 8 | (uint8_t) p->data[p->offset + 1]; + p->offset += 2; +} + +void buffer_copy32(uint32_t **val, MOBIBuffer *p) { + *val = NULL; + if (p->offset + 4 > p->maxlen) { + return; + } + *val = malloc(sizeof(uint32_t)); + if (*val == NULL) { + return; + } + **val = (uint8_t) p->data[p->offset] << 24 | (uint8_t) p->data[p->offset + 1] << 16 | (uint8_t) p->data[p->offset + 2] << 8 | (uint8_t) p->data[p->offset + 3]; + p->offset += 4; +} + +int is_littleendian() { + volatile uint32_t i = 1; + return (*((uint8_t*)(&i))) == 1; +} + +uint32_t endian_swap32(uint32_t x) { + return + (x & 0xff) << 24 | + (x & 0xff00) << 8 | + (x & 0xff0000) >> 8 | + (x & 0xff000000) >> 24; +} + +void buffer_free(MOBIBuffer *p) { + if (p == NULL) return; + + if (p->data != NULL) { + free(p->data); + } + free(p); +} diff --git a/src/buffer.h b/src/buffer.h new file mode 100644 index 0000000..88b78cf --- /dev/null +++ b/src/buffer.h @@ -0,0 +1,42 @@ +// +// buffer.h +// mobi +// +// Created by Bartek on 27.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_buffer_h +#define mobi_buffer_h + +#include +#include + +#include "mobi.h" + +typedef struct { + char *data; + size_t offset; + size_t maxlen; +} MOBIBuffer; + +MOBIBuffer * buffer_init(size_t len); +void buffer_add8(MOBIBuffer *p, uint8_t data); +void buffer_add16(MOBIBuffer *p, uint16_t data); +void buffer_add32(MOBIBuffer *p, uint32_t data); +void buffer_addraw(MOBIBuffer *p, char* buf, size_t len); +void buffer_addstring(MOBIBuffer *p, char *str); +void buffer_addzeros(MOBIBuffer *p, size_t count); +uint8_t buffer_get8(MOBIBuffer *p); +uint16_t buffer_get16(MOBIBuffer *p); +uint32_t buffer_get32(MOBIBuffer *p); +void buffer_copy8(uint8_t **val, MOBIBuffer *p); +void buffer_copy16(uint16_t **val, MOBIBuffer *p); +void buffer_copy32(uint32_t **val, MOBIBuffer *p); +void buffer_getstring(char *str, MOBIBuffer *p, size_t len); +void buffer_getraw(void *ptr, MOBIBuffer *p, size_t len); +void buffer_free(MOBIBuffer *p); +int is_littleendian(); +uint32_t endian_swap32(uint32_t x); + +#endif diff --git a/src/compression.c b/src/compression.c new file mode 100644 index 0000000..a50f6e2 --- /dev/null +++ b/src/compression.c @@ -0,0 +1,139 @@ +// +// compression.c +// mobi +// +// Created by Bartek on 27.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include + +#include "compression.h" +#include "mobi.h" + + +// PalmDOC version of LZ77 compression +// Decompressor based on this algorithm: +// http://en.wikibooks.org/wiki/Data_Compression/Dictionary_compression#PalmDoc +// +size_t mobi_decompress_lz77(char *out, const char *in, size_t len) { + size_t start_in = (size_t) in; + size_t start_out = (size_t) out; + while ((size_t) in - start_in < len) { + uint8_t val = (uint8_t) in[0]; + // byte pair: space + char + if (val >= 0xc0) { + *(out++) = ' '; + *(out++) = val ^ 0x80; + in++; + } + // length, distance pair + // 0x8000 + (distance << 3) + ((length-3) & 0x07) + else if (val >= 0x80) { + uint16_t distance = ((((in[0] << 8) | ((uint8_t)in[1])) >> 3) & 0x7ff); + uint8_t length = (in[1] & 0x7) + 3; + while (length-- > 0) { + *(out) = *(out - distance); + out++; + } + in += 2; + } + // single char, not modified + else if (val >= 0x09) { + *(out++) = *(in++); + } + // n chars not modified + else if (val >= 0x01) { + memcpy(out, ++in, val); + out += val; + in += val; + } + // char '\0', not modified + else { + *(out++) = *(in++); + } + } + return (size_t) out - start_out; +} + +uint64_t _fill_buffer(const char *in, size_t len) { + uint32_t in1 = 0L; + uint32_t in2 = 0L; + len = (len < 8) ? len : 8; + size_t i = 0; + while (i < len && i < 4) { + in1 |= (uint8_t) in[i] << ((3-i) * 8); + i++; + } + while (i < len) { + in2 |= (uint8_t) in[i] << ((3-i) * 8); + i++; + } + return (uint64_t) in1 << 32 | in2; +} + +int shortcnt = 0; + +// Mobi version of Huffman coding +// Decompressor and HUFF/CDIC records parsing based on: +// perl EBook::Tools::Mobipocket +// python mobiunpack.py, calibre +size_t mobi_decompress_huffman(char *out, const char *in, size_t len, MOBIHuffCdic *huffcdic, size_t depth) { + size_t start_out = (size_t) out; + int8_t bitcount = 32; + int32_t bitsleft = (int32_t) len * 8; + uint32_t t1, offset; + uint32_t code, maxcode, symbol_length; + uint8_t code_length = 0, i; + uint32_t index; + uint64_t buffer; + buffer = _fill_buffer(in, len); + while (1) { + if (bitcount <= 0) { + bitcount += 32; + in += 4; + buffer = _fill_buffer(in, (bitsleft + (8 - 1)) / 8); + } + code = (buffer >> bitcount) & 0xffffffff; + // lookup code in table1 + t1 = huffcdic->table1[code >> 24]; + // get maxcode and codelen from t1 + code_length = t1 & 0x1f; + maxcode = (((t1 >> 8) + 1) << (32 - code_length)) - 1; + // check termination bit + if (!(t1 & 0x80)) { + // get offset from mincode, maxcode tables + while (code < huffcdic->mincode_table[code_length]) { + code_length++; + } + maxcode = huffcdic->maxcode_table[code_length]; + } + bitcount -= code_length; + bitsleft -= code_length; + if (bitsleft < 0) { + break; + } + // get index for symbol offset + index = (maxcode - code) >> (32 - code_length); + // check which part of cdic to use + i = index >> huffcdic->code_length; + // get offset + offset = huffcdic->symbol_offsets[index]; + symbol_length = (uint8_t) huffcdic->symbols[i][offset] << 8 | (uint8_t) huffcdic->symbols[i][offset + 1]; + // 1st bit is is_decompressed flag + int is_decompressed = symbol_length >> 15; + // get rid of flag + symbol_length &= 0x7fff; + if (is_decompressed) { + memcpy(out, (huffcdic->symbols[i] + offset + 2), symbol_length); + out += symbol_length; + } else { + // symbol is compressed + // TODO cache uncompressed symbols? + out += mobi_decompress_huffman(out, (huffcdic->symbols[i] + offset + 2), (symbol_length), huffcdic, depth + 1); + } + } + return (size_t) out - start_out; + +} diff --git a/src/compression.h b/src/compression.h new file mode 100644 index 0000000..3d1678c --- /dev/null +++ b/src/compression.h @@ -0,0 +1,29 @@ +// +// compression.h +// mobi +// +// Created by Bartek on 27.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_lz77_h +#define mobi_lz77_h + +#include +#include + +typedef struct { + size_t index_count; + size_t index_read; + size_t code_length; + uint32_t table1[256]; + uint32_t mincode_table[33]; + uint32_t maxcode_table[33]; + uint16_t *symbol_offsets; + char **symbols; +} MOBIHuffCdic; + +size_t mobi_decompress_lz77(char *out, const char *in, size_t len); +size_t mobi_decompress_huffman(char *out, const char *in, size_t len, MOBIHuffCdic *huffcdic, size_t depth); + +#endif diff --git a/src/debug.c b/src/debug.c new file mode 100644 index 0000000..1767df1 --- /dev/null +++ b/src/debug.c @@ -0,0 +1,38 @@ +// +// debug.c +// mobi +// +// Created by Bartek on 02.04.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include "debug.h" + +#include + +#if MOBI_DEBUG +// debug +void debug_free(void *ptr, char *file, int line){ + printf("%s:%d: free(%p)\n",file, line, ptr); + (free)(ptr); +} + +void *debug_malloc(size_t size, char *file, int line) { + void *ptr = (malloc)(size); + printf("%s:%d: malloc(%d)=%p\n", file, line, (int)size, ptr); + return ptr; +} + +void *debug_realloc(void *ptr, size_t size, char *file, int line) { + printf("%s:%d: realloc(%p", file, line, ptr); + void *rptr = (realloc)(ptr, size); + printf(", %d)=%p\n", (int)size, rptr); + return rptr; +} + +void *debug_calloc(size_t num, size_t size, char *file, int line) { + void *ptr = (calloc)(num, size); + printf("%s:%d: calloc(%d, %d)=%p\n", file, line, (int)num, (int)size, ptr); + return ptr; +} +#endif diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 0000000..c10c5e6 --- /dev/null +++ b/src/debug.h @@ -0,0 +1,27 @@ +// +// debug.h +// mobi +// +// Created by Bartek on 02.04.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_debug_h +#define mobi_debug_h + +#include + +#define MOBI_DEBUG 0 +#if MOBI_DEBUG +#define free(x) debug_free(x,__FILE__,__LINE__) +void debug_free(void *ptr, char *file, int line); +#define malloc(x) debug_malloc(x, __FILE__, __LINE__ ) +void *debug_malloc(size_t size, char *file, int line); +#define realloc(x, y) debug_realloc(x, y, __FILE__, __LINE__ ) +void *debug_realloc(void *ptr, size_t size, char *file, int line); +#define calloc(x, y) debug_calloc(x, y, __FILE__, __LINE__ ) +void *debug_calloc(size_t num, size_t size, char *file, int line); +#endif + + +#endif diff --git a/src/memory.c b/src/memory.c new file mode 100644 index 0000000..c872d4a --- /dev/null +++ b/src/memory.c @@ -0,0 +1,156 @@ +// +// memory.c +// mobi +// +// Created by Bartek on 31.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include "memory.h" + +MOBIData * mobi_init() { + MOBIData *m = NULL; + m = calloc(1, sizeof(MOBIData)); + if (m == NULL) return NULL; + m->use_kf8 = MOBI_USE_KF8; + m->ph = NULL; + m->rh = NULL; + m->mh = NULL; + m->eh = NULL; + m->rec = NULL; + m->next = NULL; + return m; +} + +void mobi_free_mh(MOBIData *m) { + if (m->mh == NULL) { + return; + } + free(m->mh->header_length); + free(m->mh->mobi_type); + free(m->mh->text_encoding); + free(m->mh->uid); + free(m->mh->file_version); + free(m->mh->orth_index); + free(m->mh->infl_index); + free(m->mh->names_index); + free(m->mh->keys_index); + free(m->mh->extra0_index); + free(m->mh->extra1_index); + free(m->mh->extra2_index); + free(m->mh->extra3_index); + free(m->mh->extra4_index); + free(m->mh->extra5_index); + free(m->mh->non_text_index); + free(m->mh->full_name_offset); + free(m->mh->full_name_length); + free(m->mh->locale); + free(m->mh->input_lang); + free(m->mh->output_lang); + free(m->mh->min_version); + free(m->mh->image_index); + free(m->mh->huff_rec_index); + free(m->mh->huff_rec_count); + free(m->mh->huff_table_offset); + free(m->mh->huff_table_length); + free(m->mh->exth_flags); + free(m->mh->unknown6); + free(m->mh->drm_offset); + free(m->mh->drm_count); + free(m->mh->drm_size); + free(m->mh->drm_flags); + free(m->mh->first_text_index); + free(m->mh->last_text_index); + free(m->mh->unknown9); + free(m->mh->fcis_index); + free(m->mh->fcis_count); + free(m->mh->flis_index); + free(m->mh->flis_count); + free(m->mh->unknown10); + free(m->mh->unknown11); + free(m->mh->srcs_index); + free(m->mh->srcs_count); + free(m->mh->unknown12); + free(m->mh->unknown13); + free(m->mh->extra_flags); + free(m->mh->ncx_index); + free(m->mh->unknown14); + free(m->mh->unknown15); + free(m->mh->datp_index); + free(m->mh->unknown16); + free(m->mh->unknown17); + free(m->mh->unknown18); + free(m->mh->unknown19); + free(m->mh->unknown20); + free(m->mh); + m->mh = NULL; +} + +void mobi_free_rec(MOBIData *m) { + MOBIPdbRecord *curr, *tmp; + curr = m->rec; + while (curr != NULL) { + tmp = curr; + curr = curr->next; + free(tmp->data); + free(tmp); + tmp = NULL; + } + m->rec = NULL; +} + +void mobi_free_eh(MOBIData *m) { + MOBIExtHeader *curr, *tmp; + curr = m->eh; + while (curr != NULL) { + tmp = curr; + curr = curr->next; + free(tmp->data); + free(tmp); + tmp = NULL; + } + m->eh = NULL; +} + +void mobi_free(MOBIData *m) { + if (m == NULL) { + return; + } + mobi_free_mh(m); + mobi_free_eh(m); + mobi_free_rec(m); + free(m->ph); + free(m->rh); + if (m->next) { + mobi_free_mh(m->next); + mobi_free_eh(m->next); + free(m->next->rh); + free(m->next); + m->next = NULL; + } + free(m); + m = NULL; +} + +MOBIHuffCdic * mobi_init_huffcdic(MOBIData *m) { + MOBIHuffCdic *huffcdic; + int ret; + huffcdic = calloc(1, sizeof(MOBIHuffCdic)); + if (huffcdic == NULL) { + printf("Memory allocation for huffcdic structure failed\n"); + return NULL; + } + ret = mobi_parse_huffdic(m, huffcdic); + if (ret == MOBI_ERROR) { + free(huffcdic); + return NULL; + } + return huffcdic; +} + +void mobi_free_huffcdic(MOBIHuffCdic *huffcdic) { + free(huffcdic->symbol_offsets); + free(huffcdic->symbols); + free(huffcdic); +} diff --git a/src/memory.h b/src/memory.h new file mode 100644 index 0000000..6477bab --- /dev/null +++ b/src/memory.h @@ -0,0 +1,23 @@ +// +// memory.h +// mobi +// +// Created by Bartek on 31.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_memory_h +#define mobi_memory_h + +#include "mobi.h" + +MOBIData * mobi_init(); +void mobi_free_mh(MOBIData *m); +void mobi_free_rec(MOBIData *m); +void mobi_free_eh(MOBIData *m); +void mobi_free(MOBIData *m); + +MOBIHuffCdic * mobi_init_huffcdic(MOBIData *m); +void mobi_free_huffcdic(MOBIHuffCdic *huffcdic); + +#endif diff --git a/src/mobi.h b/src/mobi.h new file mode 100644 index 0000000..3cd7bdc --- /dev/null +++ b/src/mobi.h @@ -0,0 +1,253 @@ +// +// mobi.h +// libmobi +// +// Created by Bartek on 24.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef libmobi_mobi_h +#define libmobi_mobi_h + +#include +#include "buffer.h" +#include "compression.h" +#include "debug.h" + +#define MOBI_ERROR -1 +#define MOBI_SUCCESS 0 + +#define MOBI_USE_KF8 1 +#define MOBI_USE_KF7 0 + +#define EPOCH_MAC_DIFF 2082844800 +#define PALMDB_HEADER_LEN 78 +#define PALMDB_NAME_SIZE_MAX 32 +#define PALMDB_ATTRIBUTE_DEFAULT 0 +#define PALMDB_VERSION_DEFAULT 0 +#define PALMDB_MODNUM_DEFAULT 0 +#define PALMDB_APPINFO_DEFAULT 0 +#define PALMDB_SORTINFO_DEFAULT 0 +#define PALMDB_TYPE_DEFAULT "BOOK" +#define PALMDB_CREATOR_DEFAULT "MOBI" +#define PALMDB_NEXTREC_DEFAULT 0 + +#define RECORD0_HEADER_LEN 16 +#define RECORD0_NO_COMPRESSION 1 +#define RECORD0_PALMDOC_COMPRESSION 2 +#define RECORD0_HUFF_COMPRESSION 17480 +#define RECORD0_RECORD_SIZE_MAX 4096 +#define RECORD0_NO_ENCRYPTION 0 +#define RECORD0_OLD_ENCRYPTION 1 +#define RECORD0_MOBI_ENCRYPTION 2 + +#define PDB_RECORD_INFO_SIZE 8 + +#define MOBI_MAGIC "MOBI" +#define EXTH_MAGIC "EXTH" +#define HUFF_MAGIC "HUFF" +#define CDIC_MAGIC "CDIC" + +#define CDIC_HEADER_LEN 16 +#define HUFF_HEADER_LEN 24 +#define HUFF_RECORD_MINSIZE 2584 + +// EXTH +#define DRM_SERVER_ID 1 +#define DRM_COMMERCE_ID 2 +#define DRM_EBOOKBASE_BOOK_ID 3 + +#define MOBI_EXTH_AUTHOR 100 // +#define MOBI_EXTH_PUBLISHER 101 // +#define MOBI_EXTH_IMPRINT 102 // +#define MOBI_EXTH_DESCRIPTION 103 // +#define MOBI_EXTH_ISBN 104 // +#define MOBI_EXTH_SUBJECT 105 // +#define MOBI_EXTH_PUBLISHINGDATE 106 // +#define MOBI_EXTH_REVIEW 107 // +#define MOBI_EXTH_CONTRIBUTOR 108 // +#define MOBI_EXTH_RIGHTS 109 // +#define MOBI_EXTH_SUBJECTCODE 110 // +#define MOBI_EXTH_TYPE 111 // +#define MOBI_EXTH_SOURCE 112 // +#define MOBI_EXTH_ASIN 113 +#define MOBI_EXTH_VERSION 114 +#define MOBI_EXTH_SAMPLE 115 +#define MOBI_EXTH_STARTREADING 116 +#define MOBI_EXTH_ADULT 117 // +#define MOBI_EXTH_PRICE 118 // +#define MOBI_EXTH_PRICECURRENCY 119 // +#define MOBI_EXTH_KF8BOUNDARY 121 +#define MOBI_EXTH_COUNTRESOURCES 125 +#define MOBI_EXTH_KF8OVERURI 129 + +#define MOBI_EXTH_DICTNAME 200 // +#define MOBI_EXTH_COVEROFFSET 201 // +#define MOBI_EXTH_THUMBOFFSET 202 +#define MOBI_EXTH_HASFAKECOVER 203 +#define MOBI_EXTH_CREATORSOFT 204 +#define MOBI_EXTH_CREATORMAJOR 205 +#define MOBI_EXTH_CREATORMINOR 206 +#define MOBI_EXTH_CREATORBUILD 207 +#define MOBI_EXTH_WATERMARK 208 +#define MOBI_EXTH_TAMPERKEYS 209 + +#define MOBI_EXTH_FONTSIGNATURE 300 + +#define MOBI_EXTH_CLIPPINGLIMIT 401 +#define MOBI_EXTH_PUBLISHERLIMIT 402 +#define MOBI_EXTH_TTS 404 +#define MOBI_EXTH_RENAL 405 +#define MOBI_EXTH_RENALEXPIRE 406 + +#define MOBI_EXTH_CDETYPE 501 +#define MOBI_EXTH_LASTUPDATE 502 +#define MOBI_EXTH_UPDATEDTITLE 503 +#define MOBI_EXTH_LANGUAGE 524 // +#define MOBI_EXTH_ALIGNMENT 525 +#define MOBI_EXTH_CREATORBUILD2 535 + + + + + +typedef struct { + char name[PALMDB_NAME_SIZE_MAX + 1]; // zero terminated, trimmed title+author + uint16_t attributes; // PALMDB_ATTRIBUTE_DEFAULT + uint16_t version; // PALMDB_VERSION_DEFAULT + uint32_t ctime; // creation time + uint32_t mtime; // modification time + uint32_t btime; // backup time + uint32_t mod_num; // PALMDB_MODNUM_DEFAULT + uint32_t appinfo_offset; // PALMDB_APPINFO_DEFAULT + uint32_t sortinfo_offset; // PALMDB_SORTINFO_DEFAULT + char type[5]; // PALMDB_TYPE_DEFAULT + char creator[5]; // PALMDB_CREATOR_DEFAULT + uint32_t uid; // used internally to identify record + uint32_t next_rec; // PALMDB_NEXTREC_DEFAULT + uint16_t rec_count; // number of records in the file +} MOBIPdbHeader; + + + +typedef struct pdb_record { + size_t offset; + size_t size; + uint8_t attributes; + uint32_t uid; + char *data; + struct pdb_record *next; +} MOBIPdbRecord; + +typedef struct exth { + int uid; + size_t size; + void *data; + struct exth *next; +} MOBIExtHeader; + +typedef struct { + // PalmDOC header (extended), offset 0, length 16 + uint16_t compression_type; // 0; 1 == no compression, 2 = PalmDOC compression, 17480 = HUFF/CDIC compression + //uint16_t unused; // 2; 0 + uint32_t text_length; // 4; uncompressed length of the entire text of the book + uint16_t text_record_count; // 8; number of PDB records used for the text of the book + uint16_t text_record_size; // 10; maximum size of each record containing text, always 4096 + uint16_t encryption_type; // 12; 0 == no encryption, 1 = Old Mobipocket Encryption, 2 = Mobipocket Encryption + uint16_t unknown1; // 14; usually 0 +} MOBIRecord0Header; + +typedef struct { + // MOBI header, offset 16 + char mobi_magic[5]; // 16: M O B I { 77, 79, 66, 73 } + uint32_t *header_length; // 20: the length of the MOBI header, including the previous 4 bytes + uint32_t *mobi_type; // 24: mobipocket file type + uint32_t *text_encoding; // 28: 1252 = CP1252, 65001 = UTF-8 + uint32_t *uid; // 32: unique id + uint32_t *file_version; // 36: mobipocket format + uint32_t *orth_index; // 40: section number of orthographic meta index. 0xFFFFFFFF if index is not available. + uint32_t *infl_index; // 44: section number of inflection meta index. 0xFFFFFFFF if index is not available. + uint32_t *names_index; // 48: section number of names meta index. 0xFFFFFFFF if index is not available. + uint32_t *keys_index; // 52: section number of keys meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra0_index; // 56: section number of extra 0 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra1_index; // 60: section number of extra 1 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra2_index; // 64: section number of extra 2 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra3_index; // 68: section number of extra 3 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra4_index; // 72: section number of extra 4 meta index. 0xFFFFFFFF if index is not available. + uint32_t *extra5_index; // 76: section number of extra 5 meta index. 0xFFFFFFFF if index is not available. + uint32_t *non_text_index; // 80: first record number (starting with 0) that's not the book's text + uint32_t *full_name_offset; // 84: offset in record 0 (not from start of file) of the full name of the book + uint32_t *full_name_length; // 88: + uint32_t *locale; // 92: low byte is main language 09= English, next byte is dialect, 08 = British, 04 = US + uint32_t *input_lang; // 96: input language for a dictionary + uint32_t *output_lang; // 100: output language for a dictionary + uint32_t *min_version; // 104: minimum mobipocket version support needed to read this file. + uint32_t *image_index; // 108: first record number (starting with 0) that contains an image (sequential) + uint32_t *huff_rec_index; // 112: first huffman compression record. + uint32_t *huff_rec_count; // 116: + uint32_t *huff_table_offset; // 120: + uint32_t *huff_table_length; // 124: + uint32_t *exth_flags; // 128: bitfield. if bit 6 (0x40) is set, then there's an EXTH record + // 32 unknown bytes 0? + // unknown2 + // unknown3 + // unknown4 + // unknown5 + uint32_t *unknown6; // 164: use 0xFFFFFFFF + uint32_t *drm_offset; // 168: offset to DRM key info in DRMed files. 0xFFFFFFFF if no DRM + uint32_t *drm_count; // 172: number of entries in DRM info + uint32_t *drm_size; // 176: number of bytes in DRM info + uint32_t *drm_flags; // 180: some flags concerning the DRM info + // 8 unknown bytes 0? + // unknown7 + // unknown8 + uint16_t *first_text_index; // 192: + uint16_t *last_text_index; // 194: + uint32_t *unknown9; // 196: + uint32_t *fcis_index; // 200: + uint32_t *fcis_count; // 204: + uint32_t *flis_index; // 208: + uint32_t *flis_count; // 212: + uint32_t *unknown10; // 216: + uint32_t *unknown11; // 220: + uint32_t *srcs_index; // 224: + uint32_t *srcs_count; // 228: + uint32_t *unknown12; // 232: + uint32_t *unknown13; // 236: + // uint16_t fill 0 + uint16_t *extra_flags; // 242: + uint32_t *ncx_index; // 244: + uint32_t *unknown14; // 248: + uint32_t *unknown15; // 252: + uint32_t *datp_index; // 256: + uint32_t *unknown16; // 260: + uint32_t *unknown17; // 264: + uint32_t *unknown18; // 268: + uint32_t *unknown19; // 272: + uint32_t *unknown20; // 276: +} MOBIMobiHeader; + +typedef struct m { + uint8_t use_kf8; + MOBIPdbHeader *ph; + MOBIRecord0Header *rh; + MOBIMobiHeader *mh; + MOBIExtHeader *eh; + MOBIPdbRecord *rec; + struct m *next; +} MOBIData; + +void write_mobi(void); +int mobi_load_file(MOBIData *m, FILE *file); +int mobi_load_filename(MOBIData *m, const char *path); +MOBIData * mobi_init(); +void mobi_free(MOBIData *m); + +int mobi_parse_huffdic(MOBIData *m, MOBIHuffCdic *cdic); +MOBIPdbRecord * mobi_get_record_by_uid(MOBIData *m, size_t uid); +MOBIPdbRecord * mobi_get_record_by_seqnumber(MOBIData *m, size_t uid); +int mobi_get_rawml(MOBIData *m, char *text, size_t len); +int mobi_dump_rawml(MOBIData *m, FILE *file); +void mobi_get_fullname(MOBIData *m, char *fullname, size_t len); +int mobi_get_kf8boundary(MOBIData *m); +#endif diff --git a/src/read.c b/src/read.c new file mode 100644 index 0000000..761cca5 --- /dev/null +++ b/src/read.c @@ -0,0 +1,557 @@ +// +// read.c +// mobi +// +// Created by Bartek on 26.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include +#include "read.h" + +int mobi_load_pdbheader(MOBIData *m, FILE *file) { + MOBIBuffer *buf; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + if (!file) { + return MOBI_ERROR; + } + buf = buffer_init(PALMDB_HEADER_LEN); + if (buf == NULL) { + return MOBI_ERROR; + } + size_t len = fread(buf->data, 1, PALMDB_HEADER_LEN, file); + if (len != PALMDB_HEADER_LEN) { + buffer_free(buf); + return MOBI_ERROR; + } + m->ph = calloc(1, sizeof(MOBIPdbHeader)); + if (m->ph == NULL) { + printf("Memory allocation for pdb header failed\n"); + return MOBI_ERROR; + } + // parse header + buffer_getstring(m->ph->name, buf, PALMDB_NAME_SIZE_MAX); + m->ph->name[PALMDB_NAME_SIZE_MAX] = '\0'; + m->ph->attributes = buffer_get16(buf); + m->ph->version = buffer_get16(buf); + m->ph->ctime = buffer_get32(buf); + m->ph->mtime = buffer_get32(buf); + m->ph->btime = buffer_get32(buf); + m->ph->mod_num = buffer_get32(buf); + m->ph->appinfo_offset = buffer_get32(buf); + m->ph->sortinfo_offset = buffer_get32(buf); + buffer_getstring(m->ph->type, buf, 4); + m->ph->type[4] = '\0'; + buffer_getstring(m->ph->creator, buf, 4); + m->ph->creator[4] = '\0'; + m->ph->uid = buffer_get32(buf); + m->ph->next_rec = buffer_get32(buf); + m->ph->rec_count = buffer_get16(buf); + buffer_free(buf); + return MOBI_SUCCESS; +} + +int mobi_load_reclist(MOBIData *m, FILE *file) { + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + if (!file) { + printf("File not ready\n"); + return MOBI_ERROR; + } + int i; + MOBIBuffer *buf; + MOBIPdbRecord *curr; + m->rec = calloc(1, sizeof(MOBIPdbRecord)); + if (m->rec == NULL) { + printf("Memory allocation for pdb record failed\n"); + return MOBI_ERROR; + } + curr = m->rec; + for (i = 0; i < m->ph->rec_count; i++) { + buf = buffer_init(PDB_RECORD_INFO_SIZE); + if (buf == NULL) { + return MOBI_ERROR; + } + size_t len = fread(buf->data, 1, PDB_RECORD_INFO_SIZE, file); + if (len != PDB_RECORD_INFO_SIZE) { + buffer_free(buf); + return MOBI_ERROR; + } + if (i > 0) { + curr->next = calloc(1, sizeof(MOBIPdbRecord)); + if (curr->next == NULL) { + printf("Memory allocation for pdb record failed\n"); + return MOBI_ERROR; + } + curr = curr->next; + } + curr->offset = buffer_get32(buf); + curr->attributes = buffer_get8(buf); + uint8_t h = buffer_get8(buf); + uint16_t l = buffer_get16(buf); + curr->uid = h << 16 | l; + curr->next = NULL; + buffer_free(buf); + } + return MOBI_SUCCESS; +} + +int mobi_load_recdata(MOBIData *m, FILE *file) { + MOBIPdbRecord *curr, *next; + int ret; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + curr = m->rec; + while (curr != NULL) { + size_t size; + if (curr->next != NULL) { + next = curr->next; + size = next->offset - curr->offset; + } else { + fseek(file, 0, SEEK_END); + size = ftell(file) - curr->offset; + next = NULL; + } + + curr->size = size; + ret = mobi_load_rec(curr, file); + if (ret == MOBI_ERROR) { + printf("Error loading record uid %i data\n", curr->uid); + mobi_free_rec(m); + return MOBI_ERROR; + } + curr = next; + } + return MOBI_SUCCESS; +} + +int mobi_load_rec(MOBIPdbRecord *rec, FILE *file) { + size_t len; + int ret; + ret = fseek(file, rec->offset, SEEK_SET); + if (ret != 0) { + printf("Record %i not found\n", rec->uid); + return MOBI_ERROR; + } + rec->data = malloc(rec->size); + if (rec->data == NULL) { + printf("Memory allocation for pdb record data failed\n"); + return MOBI_ERROR; + } + len = fread(rec->data, 1, rec->size, file); + if (len < rec->size) { + printf("Truncated data in record %i\n", rec->uid); + rec->size = len; + char *ptr = realloc(rec->data, len); + if (ptr) { + rec->data = ptr; + } + } + return MOBI_SUCCESS; +} + +int mobi_parse_extheader(MOBIData *m, MOBIBuffer *buf) { + size_t saved_maxlen; + char exth_magic[4]; + size_t exth_length; + size_t rec_count; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + buffer_getstring(exth_magic, buf, 4); + exth_length = buffer_get32(buf); + rec_count = buffer_get32(buf); + if (strncmp(exth_magic, EXTH_MAGIC, 4) != 0 || + exth_length + buf->offset + 8 > buf->maxlen || + rec_count == 0) { + return MOBI_ERROR; + } + saved_maxlen = buf->maxlen; + buf->maxlen = exth_length + buf->offset - 8; + m->eh = calloc(1, sizeof(MOBIExtHeader)); + if (m->eh == NULL) { + printf("Memory allocation for EXTH header failed\n"); + return MOBI_ERROR; + } + int i; + MOBIExtHeader *curr; + curr = m->eh; + for (i = 0; i < rec_count; i++) { + if (i > 0) { + curr->next = calloc(1, sizeof(MOBIExtHeader)); + if (curr->next == NULL) { + printf("Memory allocation for EXTH header failed\n"); + return MOBI_ERROR; + } + curr = curr->next; + } + curr->uid = buffer_get32(buf); + // data size = record size minus 8 bytes for uid and size + curr->size = buffer_get32(buf) - 8; + if (curr->size == 0) { + printf("Skip record %i, data too short\n", curr->uid); + continue; + } + curr->data = malloc(curr->size); + if (curr->data == NULL) { + printf("Memory allocation for EXTH record %i failed\n", curr->uid); + mobi_free_eh(m); + return MOBI_ERROR; + } + buffer_getraw(curr->data, buf, curr->size); + curr->next = NULL; + } + buf->maxlen = saved_maxlen; + return MOBI_SUCCESS; +} + +int mobi_parse_mobiheader(MOBIData *m, MOBIBuffer *buf) { + size_t saved_maxlen; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + m->mh = calloc(1, sizeof(MOBIMobiHeader)); + if (m->mh == NULL) { + printf("Memory allocation for MOBI header failed\n"); + return MOBI_ERROR; + } + buffer_getstring(m->mh->mobi_magic, buf, 4); + m->mh->mobi_magic[4] = '\0'; + buffer_copy32(&m->mh->header_length, buf); + if (strcmp(m->mh->mobi_magic, MOBI_MAGIC) != 0 || m->mh->header_length == NULL) { + printf("MOBI header not found\n"); + mobi_free_mh(m); + return MOBI_ERROR; + } + saved_maxlen = buf->maxlen; + // read only declared MOBI header length (curr offset minus 8 already read bytes) + buf->maxlen = *m->mh->header_length + buf->offset - 8; + buffer_copy32(&m->mh->mobi_type, buf); + buffer_copy32(&m->mh->text_encoding, buf); + buffer_copy32(&m->mh->uid, buf); + buffer_copy32(&m->mh->file_version, buf); + buffer_copy32(&m->mh->orth_index, buf); + buffer_copy32(&m->mh->infl_index, buf); + buffer_copy32(&m->mh->names_index, buf); + buffer_copy32(&m->mh->keys_index, buf); + buffer_copy32(&m->mh->extra0_index, buf); + buffer_copy32(&m->mh->extra1_index, buf); + buffer_copy32(&m->mh->extra2_index, buf); + buffer_copy32(&m->mh->extra3_index, buf); + buffer_copy32(&m->mh->extra4_index, buf); + buffer_copy32(&m->mh->extra5_index, buf); + buffer_copy32(&m->mh->non_text_index, buf); + buffer_copy32(&m->mh->full_name_offset, buf); + buffer_copy32(&m->mh->full_name_length, buf); + buffer_copy32(&m->mh->locale, buf); + buffer_copy32(&m->mh->input_lang, buf); + buffer_copy32(&m->mh->output_lang, buf); + buffer_copy32(&m->mh->min_version, buf); + buffer_copy32(&m->mh->image_index, buf); + buffer_copy32(&m->mh->huff_rec_index, buf); + buffer_copy32(&m->mh->huff_rec_count, buf); + buffer_copy32(&m->mh->huff_table_offset, buf); + buffer_copy32(&m->mh->huff_table_length, buf); + buffer_copy32(&m->mh->exth_flags, buf); + buf->offset += 32; // 32 unknown bytes + buffer_copy32(&m->mh->unknown6, buf); + buffer_copy32(&m->mh->drm_offset, buf); + buffer_copy32(&m->mh->drm_count, buf); + buffer_copy32(&m->mh->drm_size, buf); + buffer_copy32(&m->mh->drm_flags, buf); + buf->offset += 8; // 8 unknown bytes + buffer_copy16(&m->mh->first_text_index, buf); + buffer_copy16(&m->mh->last_text_index, buf); + buffer_copy32(&m->mh->unknown9, buf); + buffer_copy32(&m->mh->fcis_index, buf); + buffer_copy32(&m->mh->fcis_count, buf); + buffer_copy32(&m->mh->flis_index, buf); + buffer_copy32(&m->mh->flis_count, buf); + buffer_copy32(&m->mh->unknown10, buf); + buffer_copy32(&m->mh->unknown11, buf); + buffer_copy32(&m->mh->srcs_index, buf); + buffer_copy32(&m->mh->srcs_count, buf); + buffer_copy32(&m->mh->unknown12, buf); + buffer_copy32(&m->mh->unknown13, buf); + buf->offset += 2; // 2 byte fill + buffer_copy16(&m->mh->extra_flags, buf); + buffer_copy32(&m->mh->ncx_index, buf); + buffer_copy32(&m->mh->unknown14, buf); + buffer_copy32(&m->mh->unknown15, buf); + buffer_copy32(&m->mh->datp_index, buf); + buffer_copy32(&m->mh->unknown16, buf); + buffer_copy32(&m->mh->unknown17, buf); + buffer_copy32(&m->mh->unknown18, buf); + buffer_copy32(&m->mh->unknown19, buf); + buffer_copy32(&m->mh->unknown20, buf); + if (buf->maxlen > buf->offset) { + buf->offset = buf->maxlen; + } + buf->maxlen = saved_maxlen; + return MOBI_SUCCESS; +} + + +// parse +int mobi_parse_record0(MOBIData *m, size_t seqnumber) { + MOBIBuffer *buf; + MOBIPdbRecord *record0; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + record0 = mobi_get_record_by_seqnumber(m, seqnumber); + if (record0 == NULL || record0->size == 0) { + printf("Record 0 not loaded\n"); + return MOBI_ERROR; + } + buf = buffer_init(record0->size); + if (buf == NULL) { + return MOBI_ERROR; + } + memcpy(buf->data, record0->data, record0->size); + m->rh = calloc(1, sizeof(MOBIRecord0Header)); + if (m->rh == NULL) { + printf("Memory allocation for record 0 header failed\n"); + return MOBI_ERROR; + } + // parse palmdoc header + m->rh->compression_type = buffer_get16(buf); + buf->offset += 2; // unused, 0 + m->rh->text_length = buffer_get32(buf); + m->rh->text_record_count = buffer_get16(buf); + m->rh->text_record_size = buffer_get16(buf); + m->rh->encryption_type = buffer_get16(buf); + m->rh->unknown1 = buffer_get16(buf); + if (strcmp(m->ph->type, "BOOK") == 0 && strcmp(m->ph->creator, "MOBI") == 0) { + // parse mobi header + mobi_parse_mobiheader(m, buf); + // parse exth header + mobi_parse_extheader(m, buf); + } + buffer_free(buf); + return MOBI_SUCCESS; +} + +int mobi_parse_huff(MOBIHuffCdic *huffcdic, MOBIPdbRecord *record) { + MOBIBuffer *buf; + char huff_magic[5]; + size_t header_length; + buf = buffer_init(record->size); + if (buf == NULL) { + return MOBI_ERROR; + } + memcpy(buf->data, record->data, record->size); + buffer_getstring(huff_magic, buf, 4); + header_length = buffer_get32(buf); + if (strncmp(huff_magic, HUFF_MAGIC, 4) != 0 || header_length < HUFF_HEADER_LEN) { + printf("HUFF wrong magic: %s\n", huff_magic); + buffer_free(buf); + return MOBI_ERROR; + } + size_t data1_offset = buffer_get32(buf); + size_t data2_offset = buffer_get32(buf); + // skip little-endian table offsets + buf->offset = data1_offset; + if (buf->offset + (256 * 4) > buf->maxlen) { + printf("HUFF data1 too short\n"); + buffer_free(buf); + return MOBI_ERROR; + } + // read 256 indices from data1 big-endian + for (int i = 0; i < 256; i++) { + huffcdic->table1[i] = buffer_get32(buf); + } + buf->offset = data2_offset; + if (buf->offset + (64 * 4) > buf->maxlen) { + printf("HUFF data2 too short\n"); + buffer_free(buf); + return MOBI_ERROR; + } + // read 32 mincode-maxcode pairs from data2 big-endian + uint32_t mincode, maxcode; + huffcdic->mincode_table[0] = 0; + huffcdic->maxcode_table[0] = 0xFFFFFFFF; + for (int i = 1; i < 33; i++) { + mincode = buffer_get32(buf); + maxcode = buffer_get32(buf); + huffcdic->mincode_table[i] = mincode << (32 - i); + huffcdic->maxcode_table[i] = ((maxcode + 1) << (32 - i)) - 1; + } + buffer_free(buf); + return MOBI_SUCCESS; +} + +int mobi_parse_cdic(MOBIHuffCdic *huffcdic, MOBIPdbRecord *record, int num) { + MOBIBuffer *buf; + char cdic_magic[5]; + size_t header_length, index_count, code_length; + buf = buffer_init(record->size); + if (buf == NULL) { + return MOBI_ERROR; + } + memcpy(buf->data, record->data, record->size); + buffer_getstring(cdic_magic, buf, 4); + header_length = buffer_get32(buf); + if (strncmp(cdic_magic, CDIC_MAGIC, 4) != 0 || header_length < CDIC_HEADER_LEN) { + printf("CDIC wrong magic: %s\n", cdic_magic); + buffer_free(buf); + return MOBI_ERROR; + } + // variables in huffcdic initialized to zero with calloc + // save initial count and length + index_count = buffer_get32(buf); + code_length = buffer_get32(buf); + if (huffcdic->code_length && huffcdic->code_length != code_length) { + printf("Warning: CDIC different code length %zu in record %i, previous was %zu\n", huffcdic->code_length, record->uid, code_length); + } + if (huffcdic->index_count && huffcdic->index_count != index_count) { + printf("Warning: CDIC different index count %zu in record %i, previous was %zu\n", huffcdic->index_count, record->uid, index_count); + } + huffcdic->code_length = code_length; + huffcdic->index_count = index_count; + if (index_count == 0) { + printf("CDIC index count is null"); + buffer_free(buf); + return MOBI_ERROR; + } + // allocate memory for symbol offsets if not already allocated + if (num == 0) { + huffcdic->symbol_offsets = malloc(index_count * sizeof(*huffcdic->symbol_offsets)); + if (huffcdic->symbol_offsets == NULL) { + printf("CDIC cannot allocate memory"); + buffer_free(buf); + return MOBI_ERROR; + } + } + index_count -= huffcdic->index_read; + // limit number of records read to code_length bits + if (index_count >> code_length) { + index_count = (1 << code_length); + } + if (buf->offset + (index_count * 2) > buf->maxlen) { + printf("CDIC indices data too short\n"); + buffer_free(buf); + free(huffcdic->symbol_offsets); + return MOBI_ERROR; + } + // read i * 2 byte big-endian indices + while (index_count--) { + huffcdic->symbol_offsets[huffcdic->index_read++] = buffer_get16(buf); + } + if (buf->offset + code_length > buf->maxlen) { + printf("CDIC dictionary data too short"); + free(huffcdic->symbol_offsets); + buffer_free(buf); + return MOBI_ERROR; + } + // copy pointer to data + huffcdic->symbols[num] = record->data + CDIC_HEADER_LEN; + // free buffer + buffer_free(buf); + return MOBI_SUCCESS; +} + +int mobi_parse_huffdic(MOBIData *m, MOBIHuffCdic *huffcdic) { + MOBIPdbRecord *curr; + int ret, i = 0; + if (m->mh == NULL || m->mh->huff_rec_index == NULL) { + printf("HUFF/CDIC records metadata not found in MOBI header\n"); + return MOBI_ERROR; + } + size_t huff_rec_index = *m->mh->huff_rec_index; + size_t huff_rec_count = *m->mh->huff_rec_count; + curr = mobi_get_record_by_seqnumber(m, huff_rec_index); + if (curr == NULL) { + printf("HUFF record not found\n"); + return MOBI_ERROR; + } + if (curr->size < HUFF_RECORD_MINSIZE) { + printf("HUFF record too short (%zu b)\n", curr->size); + return MOBI_ERROR; + } + ret = mobi_parse_huff(huffcdic, curr); + if (ret == MOBI_ERROR) { + printf("HUFF parsing failed\n"); + return MOBI_ERROR; + } + //huff_rec_index++; + curr = curr->next; + // allocate memory for symbols data in each CDIC record + huffcdic->symbols = malloc((huff_rec_count - 1) * sizeof(*huffcdic->symbols)); + // get following CDIC records + while (i < huff_rec_count - 1) { + ret = mobi_parse_cdic(huffcdic, curr, i++); + if (ret == MOBI_ERROR) { + printf("CDIC parsing failed\n"); + free(huffcdic->symbols); + return MOBI_ERROR; + } + curr = curr->next; + } + + return MOBI_SUCCESS; +} +int mobi_load_file(MOBIData *m, FILE *file) { + int ret; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + ret = mobi_load_pdbheader(m, file); + + if (strcmp(m->ph->type, "BOOK") != 0 && strcmp(m->ph->type, "TEXt") != 0) { + printf("Unsupported file type: %s\n", m->ph->type); + return MOBI_ERROR; + } + + if (ret == MOBI_ERROR || m->ph->rec_count == 0) { + printf("No records found\n"); + return MOBI_ERROR; + } + ret = mobi_load_reclist(m, file); + if (ret == MOBI_ERROR) { + return MOBI_ERROR; + } + ret = mobi_load_recdata(m, file); + if (ret == MOBI_ERROR) { + return MOBI_ERROR; + } + ret = mobi_parse_record0(m, 0); + // if EXTH is loaded and use_kf8 flag is set parse KF8 record0 for joined mobi7/kf8 file + if (m->eh && m->use_kf8) { + int boundary_rec_number; + boundary_rec_number = mobi_get_kf8boundary(m); + if (boundary_rec_number >= 0) { + // it is a joint mobi7/kf8 file + m->next = mobi_init(); + // link pdb header and records data to kf8data structure + m->next->ph = m->ph; + m->next->rec = m->rec; + // close next loop + m->next->next = m; + ret = mobi_parse_record0(m->next, boundary_rec_number + 1); + mobi_swap_mobidata(m); + } + } + return ret; +} + +int mobi_load_filename(MOBIData *m, const char *path) { + FILE *file; + int ret; + file = fopen(path, "rb"); + ret = mobi_load_file(m, file); + fclose(file); + return ret; +} diff --git a/src/read.h b/src/read.h new file mode 100644 index 0000000..5baa6fd --- /dev/null +++ b/src/read.h @@ -0,0 +1,21 @@ +// +// read.h +// mobi +// +// Created by Bartek on 26.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_read_h +#define mobi_read_h + +#include "mobi.h" +#include "memory.h" +#include "util.h" + +int mobi_load_pdbheader(MOBIData *m, FILE *file); +int mobi_load_reclist(MOBIData *m, FILE *file); +int mobi_load_recdata(MOBIData *m, FILE *file); +int mobi_load_rec(MOBIPdbRecord *rec, FILE *file); + +#endif diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..600e1e8 --- /dev/null +++ b/src/util.c @@ -0,0 +1,331 @@ +// +// util.c +// mobi +// +// Created by Bartek on 08.04.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include "util.h" + +void mobi_get_fullname(MOBIData *m, char *fullname, size_t len) { + fullname[0] = '\0'; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return; + } + MOBIPdbRecord *record0 = mobi_get_record_by_seqnumber(m, 0); + if (m->mh == NULL || m->mh->full_name_offset == NULL || record0 == NULL) { + return; + } + strncpy(fullname, record0->data + *m->mh->full_name_offset, len); +} + +MOBIPdbRecord * mobi_get_record_by_uid(MOBIData *m, size_t uid) { + MOBIPdbRecord *curr; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return NULL; + } + if (m->rec == NULL) { + return NULL; + } + curr = m->rec; + while (curr != NULL) { + if (curr->uid == uid) { + return curr; + } + curr = curr->next; + } + return NULL; +} + +MOBIPdbRecord * mobi_get_record_by_seqnumber(MOBIData *m, size_t num) { + MOBIPdbRecord *curr; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return NULL; + } + if (m->rec == NULL) { + return NULL; + } + int i = 0; + curr = m->rec; + while (curr != NULL) { + if (i++ == num) { + return curr; + } + curr = curr->next; + } + return NULL; +} + +int mobi_delete_record_by_seqnumber(MOBIData *m, size_t num) { + MOBIPdbRecord *curr, *prev; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + if (m->rec == NULL) { + return MOBI_ERROR; + } + int i = 0; + curr = m->rec; + prev = NULL; + while (curr != NULL) { + if (i++ == num) { + if (prev == NULL) { + m->rec = curr->next; + } else { + prev->next = curr->next; + } + free(curr->data); + curr->data = NULL; + free(curr); + curr = NULL; + return MOBI_SUCCESS; + } + prev = curr; + curr = curr->next; + } + return MOBI_SUCCESS; +} + +MOBIExtHeader * mobi_get_exthtag_by_uid(MOBIData *m, size_t uid) { + MOBIExtHeader *curr; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return NULL; + } + if (m->eh == NULL) { + return NULL; + } + curr = m->eh; + while (curr != NULL) { + if (curr->uid == uid) { + return curr; + } + curr = curr->next; + } + return NULL; +} + +size_t sizeof_trailing_entry(MOBIPdbRecord *record, size_t psize) { + size_t bitpos = 0; + size_t result = 0; + uint8_t v; + while (1) { + v = *(record->data + psize - 1); + result |= (v & 0x7F) << bitpos; + bitpos += 7; + psize -= 1; + if ((v & 0x80) != 0 || (bitpos >= 28) || (psize == 0)) { + return result; + } + } +} + +size_t mobi_get_record_extrasize(MOBIPdbRecord *record, uint16_t flags) { + size_t num, size; + num = 0; + size = record->size; + int mb_flag = flags & 1; + flags >>= 1; + while (flags) { + if (flags & 1) { + num += sizeof_trailing_entry(record, size - num); + } + flags >>= 1; + } + if (mb_flag){ + num += (*(record->data + size - num - 1) & 0x3) + 1; + } + return num; +} + +/*size_t mobi_get_record_extrasize(MOBIPdbRecord *record, uint16_t flags) { + size_t extra_size = 0, offset = 1; + uint8_t b; + for (int bit = 15; bit > 0; bit--) { + if (flags & (1 << bit)) { + // bit is set + int bit_count = 0; + do { + // read at most 4 * 7-bit ints, bit 7 set stops search + b = *(record->data + record->size - offset); + extra_size |= (b & 0x7f) << bit_count; + bit_count += 7; + offset++; + } while (!(b & 0x80) && (bit_count < 28) && offset < record->size); + offset += extra_size - 1; + } + }; + // check bit 0 + if (flags & 1) { + if (offset < record->size) { + b = *(record->data + record->size - offset); + // two first bits hold size + extra_size += (b & 0x3) + 1; + } + + } + return extra_size; +}*/ + +// wrapper for mobi_get_rawml and mobi_dump_rawml +int mobi_decompress_content(MOBIData *m, char *text, FILE *file, size_t len, int dump) { + MOBIPdbRecord *curr; + size_t text_rec_index; + size_t offset = 0; + size_t text_length = 0; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return MOBI_ERROR; + } + // check if we want to parse kf8 part of joint file + if (m->use_kf8 && m->next != NULL) { + int kf8_offset = mobi_get_kf8boundary(m->next); + if (kf8_offset >= 0) { + // kf8 boundary + 1 * record0 + offset = kf8_offset + 1; + } + } + if (m->rh == NULL || m->rh->text_record_count == 0) { + printf("Text records not found in MOBI header\n"); + return MOBI_ERROR; + } + text_rec_index = 1 + offset; + size_t text_rec_count = m->rh->text_record_count; + uint16_t compression_type = m->rh->compression_type; + // check for extra data at the end of text files + uint16_t extra_flags = 0, extra_size = 0; + if (m->mh && m->mh->extra_flags) { + extra_flags = *m->mh->extra_flags; + } + // get first text record + curr = mobi_get_record_by_seqnumber(m, text_rec_index); + + size_t d_size, record_size; + char decompressed[2*RECORD0_RECORD_SIZE_MAX + 32]; // FIXME debug + MOBIHuffCdic *huffcdic = NULL; + if (compression_type == RECORD0_HUFF_COMPRESSION) { + // load huff/cdic tables + huffcdic = mobi_init_huffcdic(m); + } + // get following CDIC records + while (text_rec_count--) { + if (curr->uid == 17622) { // FIXME debug + ;; + } + if (extra_flags) { + extra_size = mobi_get_record_extrasize(curr, extra_flags); + } + record_size = curr->size - extra_size; + switch (compression_type) { + case RECORD0_NO_COMPRESSION: + // no compression + strncat(decompressed, curr->data, curr->size); + d_size = curr->size; + break; + case RECORD0_PALMDOC_COMPRESSION: + // palmdoc lz77 compression + d_size = mobi_decompress_lz77(decompressed, curr->data, record_size); + break; + case RECORD0_HUFF_COMPRESSION: + // mobi huffman compression + d_size = mobi_decompress_huffman(decompressed, curr->data, record_size, huffcdic, 0); + if (d_size > RECORD0_RECORD_SIZE_MAX) { + d_size = RECORD0_RECORD_SIZE_MAX; + } + break; + default: + printf("Unknown compression type\n"); + return MOBI_ERROR; + } + curr = curr->next; + text_length += d_size; + + if (dump) { + fwrite(decompressed, 1, d_size, file); + } else { + if (text_length > len) { + printf("Text buffer too small\n"); + // free huff/cdic tables + if (compression_type == RECORD0_HUFF_COMPRESSION) { + mobi_free_huffcdic(huffcdic); + } + return MOBI_ERROR; + } + strncat(text, decompressed, d_size); + } + } + // free huff/cdic tables + if (compression_type == RECORD0_HUFF_COMPRESSION) { + mobi_free_huffcdic(huffcdic); + } + return MOBI_SUCCESS; +} + +// copy raw text to text buffer +int mobi_get_rawml(MOBIData *m, char *text, size_t len) { + if (m->rh->text_length > len) { + printf("Text buffer smaller then text size declared in record0 header\n"); + return MOBI_ERROR; + } + text[0] = '\0'; + int ret = mobi_decompress_content(m, text, NULL, len, 0); + return ret; +} + +// dump raw text records to open file descriptor +int mobi_dump_rawml(MOBIData *m, FILE *file) { + int ret = mobi_decompress_content(m, NULL, file, 0, 1); + return ret; +} + +// return kf8 boundary record sequential number or -1 if no such record +int mobi_get_kf8boundary(MOBIData *m) { + MOBIExtHeader *exth_tag; + MOBIPdbRecord *record; + uint32_t rec_number; + if (m == NULL) { + printf("Mobi structure not initialized\n"); + return -1; + } + exth_tag = mobi_get_exthtag_by_uid(m, MOBI_EXTH_KF8BOUNDARY); + if (exth_tag != NULL) { + rec_number = * (uint32_t*) exth_tag->data; + if (is_littleendian()) { + rec_number = endian_swap32(rec_number); + } + rec_number--; + record = mobi_get_record_by_seqnumber(m, rec_number); + if (record) { + if(strcmp(record->data, "BOUNDARY") == 0) { + return rec_number; + } + } + } + return -1; +} + +int mobi_swap_mobidata(MOBIData *m) { + MOBIData *tmp; + tmp = malloc(sizeof(MOBIData)); + if (tmp == NULL) { + printf("memory allocation failed while swaping data\n"); + return MOBI_ERROR; + } + tmp->rh = m->rh; + tmp->mh = m->mh; + tmp->eh = m->eh; + m->rh = m->next->rh; + m->mh = m->next->mh; + m->eh = m->next->eh; + m->next->rh = tmp->rh; + m->next->mh = tmp->mh; + m->next->eh = tmp->eh; + free(tmp); + tmp = NULL; + return MOBI_SUCCESS; +} diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..61b8e8f --- /dev/null +++ b/src/util.h @@ -0,0 +1,17 @@ +// +// util.h +// mobi +// +// Created by Bartek on 08.04.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_util_h +#define mobi_util_h + +#include "mobi.h" +#include "memory.h" + +int mobi_delete_record_by_seqnumber(MOBIData *m, size_t num); +int mobi_swap_mobidata(MOBIData *m); +#endif diff --git a/src/write.c b/src/write.c new file mode 100644 index 0000000..050b708 --- /dev/null +++ b/src/write.c @@ -0,0 +1,149 @@ +// +// write.c +// mobi +// +// Created by Bartek on 25.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include +#include + +#include "write.h" + +MOBIBuffer * serialize_palmdb_header(void) { + MOBIBuffer *buf; + size_t len; + char title[PALMDB_NAME_SIZE_MAX]; + strcpy(title, "TITLE"); + len = strlen(title); + + uint32_t curtime = (uint32_t)(time(NULL) + EPOCH_MAC_DIFF); + uint32_t uid = 0xff; + uint32_t rec_count = 1; + buf = buffer_init(PALMDB_HEADER_LEN); + if (buf == NULL) { + return NULL; + } + buffer_addstring(buf, title); + buffer_addzeros(buf, PALMDB_NAME_SIZE_MAX - len); + buffer_add16(buf, PALMDB_ATTRIBUTE_DEFAULT); + buffer_add16(buf, PALMDB_VERSION_DEFAULT); + buffer_add32(buf, curtime); // ctime + buffer_add32(buf, curtime); // mtime + buffer_add32(buf, 0); // btime + buffer_add32(buf, PALMDB_MODNUM_DEFAULT); + buffer_add32(buf, PALMDB_APPINFO_DEFAULT); + buffer_add32(buf, PALMDB_SORTINFO_DEFAULT); + buffer_addstring(buf, PALMDB_TYPE_DEFAULT); + buffer_addstring(buf, PALMDB_CREATOR_DEFAULT); + buffer_add32(buf, uid); + buffer_add32(buf, PALMDB_NEXTREC_DEFAULT); + buffer_add16(buf, rec_count); + return buf; +} + +MOBIBuffer * serialize_record0_header(void) { + MOBIBuffer *buf; + uint32_t text_length = 0; + uint16_t record_count = 0; + buf = buffer_init(RECORD0_HEADER_LEN); + if (buf == NULL) { + return NULL; + } + buffer_add16(buf, RECORD0_NO_COMPRESSION); + buffer_add16(buf, 0); + buffer_add32(buf, text_length); + buffer_add16(buf, record_count); + buffer_add16(buf, RECORD0_RECORD_SIZE_MAX); + buffer_add16(buf, RECORD0_NO_ENCRYPTION); + buffer_add16(buf, 0); + return buf; +} + +void buffer_output(FILE *file, MOBIBuffer *buf) { + if (file) { + fwrite(buf->data, 1, buf->offset, file); + printf("Buffer length %zu bytes\n", buf->offset); + } + buffer_free(buf); +} + +MOBIPdbRecord * build_pdbrecord(size_t offset) { + MOBIPdbRecord *record = NULL; + record = malloc(sizeof(MOBIPdbRecord)); + record->data = malloc(RECORD0_RECORD_SIZE_MAX); + strncpy(record->data, "test", RECORD0_RECORD_SIZE_MAX); + if (record->data == NULL) { + free(record); + return NULL; + } + record->offset = offset; + record->size = offset; + record->attributes = 0; + record->uid = 0; + return record; +} + +MOBIBuffer * serialize_record_info(MOBIPdbRecord *rec) { + MOBIBuffer *buf; + buf = buffer_init(8); + if (buf == NULL) { + return NULL; + } + buffer_add32(buf, (uint32_t) rec->offset); + //skip attributes, always 0; + buffer_add32(buf, rec->uid); + return buf; +} + +MOBIBuffer * serialize_pdbrecord(MOBIPdbRecord *rec) { + MOBIBuffer *buf; + buf = buffer_init(RECORD0_RECORD_SIZE_MAX); + if (buf) { + buffer_addstring(buf, rec->data); + } + return buf; +} + +MOBIBuffer * serialize_file_end(void) { + MOBIBuffer *buf; + char end[] = { 233, 142, 13, 10 }; + buf = buffer_init(4); + if (buf) { + buffer_addraw(buf, end, 4); + } + return buf; +} + + +void write_mobi(void) { + FILE *file; + MOBIBuffer *buf; + MOBIPdbRecord *rec; + file = fopen("/Users/baf/src/mobi_test/test.mobi","wb"); + buf = serialize_palmdb_header(); + printf("Writing palmdb header\n"); + buffer_output(file, buf); + rec = build_pdbrecord(PALMDB_HEADER_LEN + PDB_RECORD_INFO_SIZE + 2); + buf = serialize_record_info(rec); + buf->maxlen += 2; + buffer_addzeros(buf, 2); + printf("Writing record info + 2 zeros\n"); + buffer_output(file, buf); + buf = serialize_record0_header(); + printf("Writing record0 header\n"); + buffer_output(file, buf); + buf = serialize_pdbrecord(rec); + // TODO: improve freeing of rec buffer, see buffer_free + free(rec->data); + free(rec); + printf("Writing pdb record\n"); + buffer_output(file, buf); + buf = serialize_file_end(); + printf("Writing 4 end chars\n"); + buffer_output(file, buf); + + fclose(file); +} diff --git a/src/write.h b/src/write.h new file mode 100644 index 0000000..5b96c3b --- /dev/null +++ b/src/write.h @@ -0,0 +1,20 @@ +// +// write.h +// mobi +// +// Created by Bartek on 25.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#ifndef mobi_write_h +#define mobi_write_h + +#include "mobi.h" + +__attribute__((visibility("hidden"))) MOBIBuffer * buffer_init(size_t len); +__attribute__((visibility("hidden"))) void buffer_add8(MOBIBuffer *p, uint8_t data); +__attribute__((visibility("hidden"))) void buffer_add16(MOBIBuffer *p, uint16_t data); +__attribute__((visibility("hidden"))) void buffer_add32(MOBIBuffer *p, uint32_t data); +__attribute__((visibility("hidden"))) void buffer_addstring(MOBIBuffer *p, char *str); +__attribute__((visibility("hidden"))) void buffer_free(MOBIBuffer *p); +#endif diff --git a/tools/Makefile.am b/tools/Makefile.am new file mode 100644 index 0000000..07517c2 --- /dev/null +++ b/tools/Makefile.am @@ -0,0 +1,12 @@ +# tools + +# what flags you want to pass to the C compiler & linker +AM_CFLAGS = --pedantic -Wall -std=gnu99 -O2 +AM_LDFLAGS = + +# this lists the binaries to produce, the (non-PHONY, binary) targets in +# the previous manual Makefile +bin_PROGRAMS = mobitool +man_MANS = mobitool.1 +mobitool_SOURCES = mobitool.c +mobitool_LDADD = $(top_builddir)/src/libmobi.la diff --git a/tools/mobitool.1 b/tools/mobitool.1 new file mode 100644 index 0000000..a8dd9a3 --- /dev/null +++ b/tools/mobitool.1 @@ -0,0 +1,79 @@ +.\"Modified from man(1) of FreeBSD, the NetBSD mdoc.template, and mdoc.samples. +.\"See Also: +.\"man mdoc.samples for a complete listing of options +.\"man mdoc for the short list of editing options +.\"/usr/share/misc/mdoc.template +.Dd 26.03.14 \" DATE +.Dt test 1 \" Program name and manual section number +.Os Darwin +.Sh NAME \" Section Header - required - don't modify +.Nm test, +.\" The following lines are read in generating the apropos(man -k) database. Use only key +.\" words here as the database is built based on the words here and in the .ND line. +.Nm Other_name_for_same_program(), +.Nm Yet another name for the same program. +.\" Use .Nm macro to designate other names for the documented program. +.Nd This line parsed for whatis database. +.Sh SYNOPSIS \" Section Header - required - don't modify +.Nm +.Op Fl abcd \" [-abcd] +.Op Fl a Ar path \" [-a path] +.Op Ar file \" [file] +.Op Ar \" [file ...] +.Ar arg0 \" Underlined argument - use .Ar anywhere to underline +arg2 ... \" Arguments +.Sh DESCRIPTION \" Section Header - required - don't modify +Use the .Nm macro to refer to your program throughout the man page like such: +.Nm +Underlining is accomplished with the .Ar macro like this: +.Ar underlined text . +.Pp \" Inserts a space +A list of items with descriptions: +.Bl -tag -width -indent \" Begins a tagged list +.It item a \" Each item preceded by .It macro +Description of item a +.It item b +Description of item b +.El \" Ends the list +.Pp +A list of flags and their descriptions: +.Bl -tag -width -indent \" Differs from above in tag removed +.It Fl a \"-a flag as a list item +Description of -a flag +.It Fl b +Description of -b flag +.El \" Ends the list +.Pp +.\" .Sh ENVIRONMENT \" May not be needed +.\" .Bl -tag -width "ENV_VAR_1" -indent \" ENV_VAR_1 is width of the string ENV_VAR_1 +.\" .It Ev ENV_VAR_1 +.\" Description of ENV_VAR_1 +.\" .It Ev ENV_VAR_2 +.\" Description of ENV_VAR_2 +.\" .El +.Sh FILES \" File used or created by the topic of the man page +.Bl -tag -width "/Users/joeuser/Library/really_long_file_name" -compact +.It Pa /usr/share/file_name +FILE_1 description +.It Pa /Users/joeuser/Library/really_long_file_name +FILE_2 description +.El \" Ends the list +.\" .Sh DIAGNOSTICS \" May not be needed +.\" .Bl -diag +.\" .It Diagnostic Tag +.\" Diagnostic informtion here. +.\" .It Diagnostic Tag +.\" Diagnostic informtion here. +.\" .El +.Sh SEE ALSO +.\" List links in ascending order by section, alphabetically within a section. +.\" Please do not reference files that do not exist without filing a bug report +.Xr a 1 , +.Xr b 1 , +.Xr c 1 , +.Xr a 2 , +.Xr b 2 , +.Xr a 3 , +.Xr b 3 +.\" .Sh BUGS \" Document known, unremedied bugs +.\" .Sh HISTORY \" Document history if command behaves in a unique manner \ No newline at end of file diff --git a/tools/mobitool.c b/tools/mobitool.c new file mode 100644 index 0000000..6ad9e0d --- /dev/null +++ b/tools/mobitool.c @@ -0,0 +1,221 @@ +// +// test.c +// mobi +// +// Created by Bartek on 25.03.14. +// Copyright (c) 2014 Bartek. All rights reserved. +// + +#include +#include "../src/mobi.h" +//#include + +// FIXME: testing +#define DUMP_REC_OPT 0; +#define LOADFILENAME 1 + +void print_meta(MOBIData *m) { + if (m->mh && m->mh->full_name_offset && m->mh->full_name_length) { + char *full_name; + size_t len = *m->mh->full_name_length; + full_name = malloc(len + 1); + mobi_get_fullname(m, full_name, len); + printf("full name: %s\n", full_name); + free(full_name); + } + printf("name: %s\n", m->ph->name); + printf("attributes: %hu\n", m->ph->attributes); + printf("version: %hu\n", m->ph->version); + printf("ctime: %u\n", m->ph->ctime); + printf("mtime: %u\n", m->ph->mtime); + printf("mtime: %u\n", m->ph->mtime); + printf("btime: %u\n", m->ph->btime); + printf("mod_num: %u\n", m->ph->mod_num); + printf("appinfo_offset: %u\n", m->ph->appinfo_offset); + printf("sortinfo_offset: %u\n", m->ph->sortinfo_offset); + printf("type: %s\n", m->ph->type); + printf("creator: %s\n", m->ph->creator); + printf("uid: %u\n", m->ph->uid); + printf("next_rec: %u\n", m->ph->next_rec); + printf("rec_count: %u\n", m->ph->rec_count); + if (m->rh) { + printf("\nRecord 0:\n"); + printf("compresion type: %u\n", m->rh->compression_type); + printf("text length: %u\n", m->rh->text_length); + printf("record count: %u\n", m->rh->text_record_count); + printf("record size: %u\n", m->rh->text_record_size); + printf("encryption type: %u\n", m->rh->encryption_type); + printf("unknown: %u\n", m->rh->unknown1); + } + if (m->mh) { + printf("identifier: %s\n", m->mh->mobi_magic); + if(m->mh->header_length) { printf("header length: %u\n", *m->mh->header_length); } + if(m->mh->mobi_type) { printf("mobi type: %u\n", *m->mh->mobi_type); } + if(m->mh->text_encoding) { printf("text encoding: %u\n", *m->mh->text_encoding); } + if(m->mh->uid) { printf("unique id: %u\n", *m->mh->uid); } + if(m->mh->file_version) { printf("file version: %u\n", *m->mh->file_version); } + if(m->mh->orth_index) { printf("orth index: %u\n", *m->mh->orth_index); } + if(m->mh->infl_index) { printf("infl index: %u\n", *m->mh->infl_index); } + if(m->mh->names_index) { printf("names index: %u\n", *m->mh->names_index); } + if(m->mh->keys_index) { printf("keys index: %u\n", *m->mh->keys_index); } + if(m->mh->extra0_index) { printf("extra0 index: %u\n", *m->mh->extra0_index); } + if(m->mh->extra1_index) { printf("extra1 index: %u\n", *m->mh->extra1_index); } + if(m->mh->extra2_index) { printf("extra2 index: %u\n", *m->mh->extra2_index); } + if(m->mh->extra3_index) { printf("extra3 index: %u\n", *m->mh->extra3_index); } + if(m->mh->extra4_index) { printf("extra4 index: %u\n", *m->mh->extra4_index); } + if(m->mh->extra5_index) { printf("extra5 index: %u\n", *m->mh->extra5_index); } + if(m->mh->non_text_index) { printf("non text index: %u\n", *m->mh->non_text_index); } + if(m->mh->full_name_offset) { printf("full name offset: %u\n", *m->mh->full_name_offset); } + if(m->mh->full_name_length) { printf("full name length: %u\n", *m->mh->full_name_length); } + if(m->mh->locale) { printf("locale: %u\n", *m->mh->locale); } + if(m->mh->input_lang) { printf("input lang: %u\n", *m->mh->input_lang); } + if(m->mh->output_lang) { printf("outpu lang: %u\n", *m->mh->output_lang); } + if(m->mh->min_version) { printf("minimal version: %u\n", *m->mh->min_version); } + if(m->mh->image_index) { printf("first image index: %u\n", *m->mh->image_index); } + if(m->mh->huff_rec_index) { printf("huffman record offset: %u\n", *m->mh->huff_rec_index); } + if(m->mh->huff_rec_count) { printf("huffman record count: %u\n", *m->mh->huff_rec_count); } + if(m->mh->huff_table_offset) { printf("huffman table offset: %u\n", *m->mh->huff_table_offset); } + if(m->mh->huff_table_length) { printf("huffman table length: %u\n", *m->mh->huff_table_length); } + if(m->mh->exth_flags) { printf("EXTH flags: %u\n", *m->mh->exth_flags); } + if(m->mh->unknown6) { printf("unknown: %u\n", *m->mh->unknown6); } + if(m->mh->drm_offset) { printf("drm offset: %u\n", *m->mh->drm_offset); } + if(m->mh->drm_size) { printf("drm size: %u\n", *m->mh->drm_size); } + if(m->mh->drm_flags) { printf("drm flags: %u\n", *m->mh->drm_flags); } + if(m->mh->first_text_index) { printf("first text index: %u\n", *m->mh->first_text_index); } + if(m->mh->last_text_index) { printf("last text index: %u\n", *m->mh->last_text_index); } + if(m->mh->unknown9) { printf("unknown: %u\n", *m->mh->unknown9); } + if(m->mh->fcis_index) { printf("FCIS index: %u\n", *m->mh->fcis_index); } + if(m->mh->fcis_count) { printf("FCIS count: %u\n", *m->mh->fcis_count); } + if(m->mh->flis_index) { printf("FLIS index: %u\n", *m->mh->flis_index); } + if(m->mh->flis_count) { printf("FLIS count: %u\n", *m->mh->flis_count); } + if(m->mh->unknown10) { printf("unknown: %u\n", *m->mh->unknown10); } + if(m->mh->unknown11) { printf("unknown: %u\n", *m->mh->unknown11); } + if(m->mh->srcs_index) { printf("SRCS index: %u\n", *m->mh->srcs_index); } + if(m->mh->srcs_count) { printf("SRCS count: %u\n", *m->mh->srcs_count); } + if(m->mh->unknown12) { printf("unknown: %u\n", *m->mh->unknown12); } + if(m->mh->unknown13) { printf("unknown: %u\n", *m->mh->unknown13); } + if(m->mh->extra_flags) { printf("extra record flags: %u\n", *m->mh->extra_flags); } + if(m->mh->ncx_index) { printf("NCX offset: %u\n", *m->mh->ncx_index); } + if(m->mh->unknown14) { printf("unknown: %u\n", *m->mh->unknown14); } + if(m->mh->unknown15) { printf("unknown: %u\n", *m->mh->unknown15); } + if(m->mh->datp_index) { printf("DATP index: %u\n", *m->mh->datp_index); } + if(m->mh->unknown16) { printf("unknown: %u\n", *m->mh->unknown16); } + if(m->mh->unknown17) { printf("unknown: %u\n", *m->mh->unknown17); } + if(m->mh->unknown18) { printf("unknown: %u\n", *m->mh->unknown18); } + if(m->mh->unknown19) { printf("unknown: %u\n", *m->mh->unknown19); } + if(m->mh->unknown20) { printf("unknown: %u\n", *m->mh->unknown20); } + } +} + +void print_exth(MOBIData *m) { + MOBIExtHeader *curr; + if (m->eh == NULL) { + return; + } + curr = m->eh; + while (curr != NULL) { + char *str; + uint32_t val; + str = calloc(1, curr->size+1); + strncpy(str, curr->data, curr->size); + val = *(uint32_t*) curr->data; + if (is_littleendian()) { + val = endian_swap32(val); + } + printf("id: %i\tval: %s (%u)\tsize: %zu\n", curr->uid, str, val, curr->size); + free(str); + curr = curr->next; + } +} + +void print_records_meta(MOBIData *m) { + MOBIPdbRecord *currec; + currec = m->rec; + while (currec != NULL) { + printf("offset: %zu\n", currec->offset); + printf("size: %zu\n", currec->size); + printf("attributes: %hhu\n", currec->attributes); + printf("uid: %u\n", currec->uid); + printf("\n"); + currec = currec->next; + } +} + +void dump_records(MOBIData *m, char *filepath) { + MOBIPdbRecord *currec; + FILE *file; + char name[FILENAME_MAX]; + int i = 0; + currec = m->rec; + while (currec != NULL) { + sprintf(name, "%spart_%i_uid_%i", filepath, i++, currec->uid); + file = fopen(name, "wb"); + fwrite(currec->data, 1, currec->size, file); + fclose(file); + currec = currec->next; + } +} + +int dump_rawml(MOBIData *m, char *filepath) { + FILE *file; + int ret; + char name[FILENAME_MAX]; + sprintf(name, "%srawml", filepath); + file = fopen(name, "wb"); + ret = mobi_dump_rawml(m, file); + fclose(file); + return ret; +} + +int loadfilename(const char *filename) { + MOBIData *m; + int ret = 0; + m = mobi_init(); + if (m == NULL) { + printf("init failed\n"); + return 1; + } + char filepath[FILENAME_MAX]; + char *p = strrchr(filename, '/'); + if (p) { + p += 1; + strncpy(filepath, filename, (p - filename)); + filepath[p - filename] = '\0'; + } + else { + filepath[0] = '\0'; + } + m->use_kf8 = MOBI_USE_KF7; + ret = mobi_load_filename(m, filename); + print_meta(m); + if (ret == MOBI_ERROR) { + mobi_free(m); + return 1; + } + print_exth(m); + print_records_meta(m); + int dump_rec_opt = DUMP_REC_OPT; + if (dump_rec_opt) { + dump_records(m, filepath); + } + ret = dump_rawml(m, filepath); + mobi_free(m); + return ret; +} + +int main(int argc, char *argv[]) { + if (argc != 2) { + printf("usage: %s filename\n", argv[0]); + return 1; + } + int command = LOADFILENAME; + int ret = 0; + char filename[FILENAME_MAX]; + strncpy(filename, argv[1], FILENAME_MAX - 1); + switch (command) { + case LOADFILENAME: + ret = loadfilename(filename); + break; + } + return ret; +}