Skip to content

Commit 053eb84

Browse files
samrushingianare
authored andcommitted
initial HEIC support. (#89)
1 parent 1de2ee0 commit 053eb84

File tree

2 files changed

+249
-0
lines changed

2 files changed

+249
-0
lines changed

exifread/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from .classes import *
77
from .tags import *
88
from .utils import ord_
9+
from .heic import HEICExifFinder
910

1011
__version__ = '2.1.2'
1112

@@ -39,6 +40,10 @@ def process_file(f, stop_tag=DEFAULT_STOP_TAG, details=True, strict=False, debug
3940
endian = f.read(1)
4041
f.read(1)
4142
offset = 0
43+
elif data[4:12] == b'ftypheic':
44+
f.seek(0)
45+
heic = HEICExifFinder (f)
46+
offset, endian = heic.find_exif()
4247
elif data[0:2] == b'\xFF\xD8':
4348
# it's a JPEG file
4449
logger.debug("JPEG format recognized data[0:2]=0x%X%X", ord_(data[0]), ord_(data[1]))

exifread/heic.py

+244
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
# -*- Mode: Python -*-
2+
3+
# Find Exif data in an HEIC file.
4+
5+
# As of 2019, the latest standard seems to be "ISO/IEC 14496-12:2015"
6+
# There are many different related standards. (quicktime, mov, mp4, etc...)
7+
# See https://en.wikipedia.org/wiki/ISO_base_media_file_format for more details.
8+
9+
# We parse just enough of the iso format to locate the Exif data in the file.
10+
# Inside the 'meta' box are two directories we need:
11+
# 1) the 'iinf' box contains 'infe' records, we look for the item_ID for 'Exif'.
12+
# 2) once we have the item_ID, we find a matching entry in the 'iloc' box, which
13+
# gives us position and size information.
14+
15+
import struct
16+
17+
from .exif_log import get_logger
18+
19+
logger = get_logger()
20+
21+
class WrongBox (Exception):
22+
pass
23+
class NoParser (Exception):
24+
pass
25+
class BoxVersion (Exception):
26+
pass
27+
class BadSize (Exception):
28+
pass
29+
30+
class Box:
31+
def __init__ (self, name):
32+
self.name = name
33+
34+
def __repr__ (self):
35+
return "<box '%s'>" % (self.name,)
36+
37+
class HEICExifFinder:
38+
39+
def __init__ (self, file):
40+
self.file = file
41+
42+
def get (self, nbytes):
43+
r = self.file.read (nbytes)
44+
if not r:
45+
raise EOFError
46+
else:
47+
return r
48+
49+
def get16 (self):
50+
return struct.unpack ('>H', self.get (2))[0]
51+
52+
def get32 (self):
53+
return struct.unpack ('>L', self.get (4))[0]
54+
55+
def get64 (self):
56+
return struct.unpack ('>Q', self.get (8))[0]
57+
58+
def get_int4x2 (self):
59+
n = struct.unpack ('>B', self.get(1))[0]
60+
n0 = n >> 4
61+
n1 = n & 0xf
62+
return n0, n1
63+
64+
# some fields have variant-sized data.
65+
def get_int (self, size):
66+
if size == 2:
67+
return self.get16()
68+
elif size == 4:
69+
return self.get32()
70+
elif size == 8:
71+
return self.get64()
72+
elif size == 0:
73+
return 0
74+
else:
75+
raise BadSize (size)
76+
77+
def get_string (self):
78+
r = []
79+
while 1:
80+
ch = self.get (1)
81+
if ch == b'\x00':
82+
break
83+
else:
84+
r.append (ch)
85+
return b''.join (r)
86+
87+
def next_box (self, depth=0):
88+
pos = self.file.tell()
89+
size = self.get32()
90+
kind = self.get(4).decode('ascii')
91+
b = Box (kind)
92+
if size == 0:
93+
# signifies 'to the end of the file', we shouldn't see this.
94+
raise NotImplementedError
95+
elif size == 1:
96+
# 64-bit size follows type.
97+
size = self.get64()
98+
b.size = size - 16
99+
b.after = pos + size
100+
else:
101+
b.size = size - 8
102+
b.after = pos + size
103+
b.pos = self.file.tell()
104+
return b
105+
106+
def get_full (self, box):
107+
# iso boxes come in 'old' and 'full' variants. the 'full' variant
108+
# contains version and flags information.
109+
vflags = self.get32()
110+
box.version = vflags >> 24
111+
box.flags = vflags & 0x00ffffff
112+
113+
def skip (self, box):
114+
self.file.seek (box.after)
115+
116+
def expect_parse (self, name):
117+
b = self.next_box()
118+
if b.name == name:
119+
return self.parse_box (b)
120+
else:
121+
raise WrongBox (name, b.name)
122+
123+
def get_parser (self, box):
124+
method = 'parse_%s' % (box.name,)
125+
return getattr (self, method, None)
126+
127+
def parse_box (self, b):
128+
probe = self.get_parser (b)
129+
if probe is None:
130+
raise NoParser (b.name)
131+
else:
132+
probe (b)
133+
# in case anything is left unread
134+
self.file.seek (b.after)
135+
return b
136+
137+
def parse_ftyp (self, box):
138+
box.major_brand = self.get(4)
139+
box.minor_version = self.get32()
140+
box.compat = []
141+
size = box.size - 8
142+
while size > 0:
143+
box.compat.append (self.get (4))
144+
size -= 4
145+
146+
def parse_meta (self, meta):
147+
self.get_full (meta)
148+
# this is full of boxes, but not in a predictable order.
149+
meta.subs = {}
150+
while self.file.tell() < meta.after:
151+
box = self.next_box()
152+
psub = self.get_parser (box)
153+
if psub is not None:
154+
psub (box)
155+
meta.subs[box.name] = box
156+
else:
157+
logger.debug("HEIC: skipping %r" % (box,))
158+
# skip any unparsed data
159+
self.skip (box)
160+
161+
def parse_infe (self, box):
162+
self.get_full (box)
163+
if box.version >= 2:
164+
if box.version == 2:
165+
box.item_ID = self.get16()
166+
elif box.version == 3:
167+
box.item_ID = self.get32()
168+
box.item_protection_index = self.get16()
169+
box.item_type = self.get(4)
170+
box.item_name = self.get_string()
171+
# ignore the rest
172+
else:
173+
box.item_type = ''
174+
175+
def parse_iinf (self, box):
176+
self.get_full (box)
177+
count = self.get16()
178+
box.exif_infe = None
179+
for _ in range (count):
180+
infe = self.expect_parse ('infe')
181+
if infe.item_type == b'Exif':
182+
logger.debug("HEIC: found Exif 'infe' box")
183+
box.exif_infe = infe
184+
break
185+
186+
def parse_iloc (self, box):
187+
self.get_full (box)
188+
s0, s1 = self.get_int4x2()
189+
s2, s3 = self.get_int4x2()
190+
box.offset_size = s0
191+
box.length_size = s1
192+
box.base_offset_size = s2
193+
box.index_size = s3
194+
if box.version < 2:
195+
box.item_count = self.get16()
196+
elif box.version == 2:
197+
box.item_count = self.get32()
198+
else:
199+
raise BoxVersion (2, box.version)
200+
box.locs = {}
201+
logger.debug("HEIC: %d iloc items" % (box.item_count,))
202+
for i in range (box.item_count):
203+
if box.version < 2:
204+
item_ID = self.get16()
205+
elif box.version == 2:
206+
item_ID = self.get32()
207+
else:
208+
# notreached
209+
raise BoxVersion (2, box.version)
210+
if box.version in (1, 2):
211+
# ignore construction_method
212+
_ = self.get16()
213+
data_reference_index = self.get16()
214+
box.base_offset = self.get_int (box.base_offset_size)
215+
extent_count = self.get16()
216+
extents = []
217+
for _ in range (extent_count):
218+
if box.version in (1, 2) and box.index_size > 0:
219+
extent_index = self.get_int (box.index_size)
220+
else:
221+
extent_index = -1
222+
extent_offset = self.get_int (box.offset_size)
223+
extent_length = self.get_int (box.length_size)
224+
extents.append ((extent_offset, extent_length))
225+
box.locs[item_ID] = extents
226+
227+
def find_exif (self):
228+
ftyp = self.expect_parse ('ftyp')
229+
assert ftyp.major_brand == b'heic'
230+
assert ftyp.minor_version == 0
231+
meta = self.expect_parse ('meta')
232+
item_ID = meta.subs['iinf'].exif_infe.item_ID
233+
extents = meta.subs['iloc'].locs[item_ID]
234+
logger.debug("HEIC: found Exif location.")
235+
# we expect the Exif data to be in one piece.
236+
assert len(extents) == 1
237+
pos, size = extents[0]
238+
# looks like there's a kind of pseudo-box here.
239+
self.file.seek (pos)
240+
size1 = self.get32()
241+
assert self.get(size1)[0:4] == b'Exif'
242+
offset = self.file.tell()
243+
endian = self.file.read(1)
244+
return offset, endian

0 commit comments

Comments
 (0)