forked from goldendict/goldendict
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmdictparser.hh
219 lines (182 loc) · 5.35 KB
/
mdictparser.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
// https://bitbucket.org/xwang/mdict-analysis
// https://github.com/zhansliu/writemdict/blob/master/fileformat.md
// Octopus MDict Dictionary File (.mdx) and Resource File (.mdd) Analyser
//
// Copyright (C) 2012, 2013 Xiaoqiang Wang <xiaoqiangwang AT gmail DOT com>
// Copyright (C) 2013 Timon Wong <timon86.wang AT gmail DOT com>
// Copyright (C) 2015 Zhe Wang <0x1998 AT gmail DOT com>
//
// This program is a free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 3 of the License.
//
// You can get a copy of GNU General Public License along this program
// But you can always get it from http://www.gnu.org/licenses/gpl.txt
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
#ifndef __MDICTPARSER_HH_INCLUDED__
#define __MDICTPARSER_HH_INCLUDED__
#include <string>
#include <vector>
#include <map>
#include <utility>
#include <QPointer>
#include <QFile>
namespace Mdict
{
using std::string;
using std::vector;
using std::pair;
using std::map;
// A helper class to handle memory map for QFile
class ScopedMemMap
{
QFile & file;
uchar * address;
public:
ScopedMemMap( QFile & file, qint64 offset, qint64 size ) :
file( file ),
address( file.map( offset, size ) )
{
}
~ScopedMemMap()
{
if ( address )
file.unmap( address );
}
inline uchar * startAddress()
{
return address;
}
};
class MdictParser
{
public:
enum
{
kParserVersion = 0x000000d
};
struct RecordIndex
{
qint64 startPos;
qint64 endPos;
qint64 shadowStartPos;
qint64 shadowEndPos;
qint64 compressedSize;
qint64 decompressedSize;
inline bool operator==( qint64 rhs ) const
{
return ( shadowStartPos <= rhs ) && ( rhs < shadowEndPos );
}
inline bool operator<( qint64 rhs ) const
{
return shadowEndPos <= rhs;
}
inline bool operator>( qint64 rhs ) const
{
return shadowStartPos > rhs;
}
static size_t bsearch( vector<RecordIndex> const & offsets, qint64 val );
};
struct RecordInfo
{
qint64 compressedBlockPos;
qint64 recordOffset;
qint64 decompressedBlockSize;
qint64 compressedBlockSize;
qint64 recordSize;
};
class RecordHandler
{
public:
virtual void handleRecord( QString const & name, RecordInfo const & recordInfo ) = 0;
};
typedef vector< pair<qint64, qint64> > BlockInfoVector;
typedef vector< pair<qint64, QString> > HeadWordIndex;
typedef map<qint32, pair<QString, QString> > StyleSheets;
inline QString const & title() const
{
return title_;
}
inline QString const & description() const
{
return description_;
}
inline StyleSheets const & styleSheets() const
{
return styleSheets_;
}
inline quint32 wordCount() const
{
return wordCount_;
}
inline QString const & encoding() const
{
return encoding_;
}
inline QString const & filename() const
{
return filename_;
}
inline bool isRightToLeft() const
{
return rtl_;
}
MdictParser();
~MdictParser() {}
bool open( const char * filename );
bool readNextHeadWordIndex( HeadWordIndex & headWordIndex );
bool readRecordBlock( HeadWordIndex & headWordIndex, RecordHandler & recordHandler );
// helpers
static QString toUtf16( const char * fromCode, const char * from, size_t fromSize );
static inline QString toUtf16( QString const & fromCode, const char * from, size_t fromSize )
{
return toUtf16( fromCode.toLatin1().constData(), from, fromSize );
}
static bool parseCompressedBlock( qint64 compressedBlockSize, const char * compressedBlockPtr,
qint64 decompressedBlockSize, QByteArray & decompressedBlock);
static QString & substituteStylesheet( QString & article, StyleSheets const & styleSheets );
static inline string substituteStylesheet( string const & article, StyleSheets const & styleSheets )
{
QString s = QString::fromUtf8( article.c_str() );
substituteStylesheet( s, styleSheets );
return string( s.toUtf8().constData() );
}
protected:
qint64 readNumber( QDataStream & in );
static quint32 readU8OrU16( QDataStream & in, bool isU16 );
static bool checkAdler32(const char * buffer, unsigned int len, quint32 checksum);
static bool decryptHeadWordIndex(char * buffer, qint64 len);
bool readHeader( QDataStream & in );
bool readHeadWordBlockInfos( QDataStream & in );
bool readRecordBlockInfos();
BlockInfoVector decodeHeadWordBlockInfo( QByteArray const & headWordBlockInfo );
HeadWordIndex splitHeadWordBlock( QByteArray const & block );
protected:
QString filename_;
QPointer<QFile> file_;
StyleSheets styleSheets_;
BlockInfoVector headWordBlockInfos_;
BlockInfoVector::iterator headWordBlockInfosIter_;
vector<RecordIndex> recordBlockInfos_;
QString encoding_;
QString title_;
QString description_;
double version_;
qint64 numHeadWordBlocks_;
qint64 headWordBlockInfoSize_;
qint64 headWordBlockSize_;
qint64 headWordBlockInfoPos_;
qint64 headWordPos_;
qint64 totalRecordsSize_;
qint64 recordPos_;
quint32 wordCount_;
int numberTypeSize_;
int encrypted_;
bool rtl_;
};
}
#endif // __MDICTPARSER_HH_INCLUDED__