-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarkdown.py
34 lines (26 loc) · 926 Bytes
/
markdown.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from bs4 import BeautifulSoup
import base64
import os
# Parse HTML
with open('/home/akugyo/GitHub/bills/2024/file.html', 'r') as f:
soup = BeautifulSoup(f, 'html.parser')
# Find image tags
images = soup.find_all('img')
# Update image tags with Base64 encoded images
for img in images:
img_url = img.get('src')
# Remove leading slash or file protocol
# if img_url.startswith('/'):
# img_url = img_url[1:]
# if img_url.startswith('file://'):
# img_url = img_url[7:]
# Read image file
with open(img_url, 'rb') as image_file:
image_data = image_file.read()
# Base64 encode image
encoded_image = base64.b64encode(image_data).decode('utf-8')
# Update image tag with Base64 encoded image
img['src'] = f'data:image/{img_url.split(".")[-1]};base64,{encoded_image}'
# Save updated HTML
with open('13.html', 'w') as f:
f.write(str(soup))