Skip to content

Commit d9ae10f

Browse files
committed
feat: add a script for importing mime types
1 parent f53a549 commit d9ae10f

File tree

1 file changed

+138
-0
lines changed

1 file changed

+138
-0
lines changed

import-mime.py

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#!/usr/bin/env python
2+
3+
import csv
4+
import os
5+
import urllib.request
6+
import xml.etree.ElementTree as ET
7+
8+
sections = {
9+
"application": 0x200000,
10+
"audio": 0x210000,
11+
"font": 0x220000,
12+
"image": 0x230000,
13+
"message": 0x240000,
14+
"model": 0x250000,
15+
"multipart": 0x260000,
16+
"text": 0x270000,
17+
"video": 0x280000,
18+
}
19+
20+
ns = {'a': 'http://www.iana.org/assignments'}
21+
source = "https://www.iana.org/assignments/media-types/media-types.xml"
22+
23+
24+
class Table(list):
25+
def __init__(self, fname='table.csv'):
26+
self._fname = fname
27+
with open(fname) as table:
28+
self.extend(csv.reader(table, skipinitialspace=True))
29+
30+
def save(self):
31+
widths = {}
32+
for row in self:
33+
for i, cell in enumerate(row):
34+
if len(cell) > widths.get(i, 0):
35+
widths[i] = len(cell)
36+
37+
formatted = ((("" if i == 0 else " " *
38+
(1 + widths[i - 1] - len(row[i - 1]))) + cell
39+
for i, cell in enumerate(row)) for row in self)
40+
41+
tmpfname = self._fname + ".tmp"
42+
with open(tmpfname, 'w') as table:
43+
writer = csv.writer(table)
44+
writer.writerows(formatted)
45+
os.rename(tmpfname, self._fname)
46+
47+
48+
def formatCode(code: int) -> str:
49+
nbytes = 0
50+
if code == 0:
51+
nbytes = 1
52+
else:
53+
remaining = code
54+
while remaining > 0:
55+
remaining >>= 7
56+
nbytes += 1
57+
58+
return f"0x{code:0{nbytes*2}x}"
59+
60+
61+
def main():
62+
table = Table("table.csv")
63+
lastCode = sections.copy()
64+
assigned = {}
65+
mimeStart = 0
66+
mimeEnd = 0
67+
for mimeStart, [_, tag, _, _] in enumerate(table[1:]):
68+
if tag == "mimetype":
69+
break
70+
else:
71+
mimeStart += 1
72+
73+
mimeStart += 1 # initial offset
74+
75+
for mimeEnd, [name, tag, code,
76+
description] in enumerate(table[mimeStart:]):
77+
if tag != "mimetype":
78+
break
79+
80+
code = int(code, 16)
81+
82+
assigned[name] = (code, description)
83+
84+
parts = name.split('/')
85+
section = parts[0]
86+
if section not in sections:
87+
raise RuntimeError(f"unknown mime base type {name}")
88+
if len(parts) == 1:
89+
continue
90+
elif len(parts) != 2:
91+
raise RuntimeError(f"invalid mimetype {name}")
92+
93+
subtype = parts[1]
94+
lastCode[section] += 1
95+
if code & 0xff0000 != sections[section]:
96+
raise RuntimeError(f"wrong section for type")
97+
if lastCode[section] != code:
98+
raise RuntimeError(
99+
f"expected code 0x{lastCode[section]:x}, got 0x{code:x}")
100+
else:
101+
mimeEnd += 1
102+
103+
mimeEnd += mimeStart # initial offset
104+
105+
for [_, tag, _, _] in table[mimeEnd:]:
106+
if tag == "mimetype":
107+
raise RuntimeError(
108+
f"did not expect an mimetype out of the mime range")
109+
110+
with urllib.request.urlopen(source) as f:
111+
root = ET.parse(f).getroot()
112+
113+
if root.get("id") != "media-types":
114+
raise RuntimeError("expected root node to have id 'media-types'")
115+
116+
for mimetype in root.iterfind(
117+
'./a:registry/a:record/a:file',
118+
ns,
119+
):
120+
mimetype = mimetype.text
121+
if mimetype in assigned:
122+
continue
123+
[section, subtype] = mimetype.split('/', 1)
124+
code = lastCode[section] + 1
125+
lastCode[section] = code
126+
assigned[mimetype] = (code, "")
127+
128+
items = [(code, name, description)
129+
for name, (code, description) in assigned.items()]
130+
items.sort(key=lambda item: item[0])
131+
table[mimeStart:mimeEnd] = [(name, "mimetype", formatCode(code),
132+
description)
133+
for (code, name, description) in items]
134+
table.save()
135+
136+
137+
if __name__ == "__main__":
138+
main()

0 commit comments

Comments
 (0)