Skip to content

Commit 1347496

Browse files
committed
Support equeation and table head in markdown #1
1 parent d9eb275 commit 1347496

File tree

3 files changed

+34
-37
lines changed

3 files changed

+34
-37
lines changed

examples/insert_text.ipynb

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"notion = Client(auth=notion_api_key)\n",
4949
"parent = {\"type\": \"database_id\", \"database_id\": database_id}\n",
5050
"properties = {\"Title\": {\"title\": [{\"text\": {\"content\": \"Page with multi text\"}}]}}\n",
51-
"text_content = \"Some words\" * 200 + \"Any\"\n",
51+
"text_content = \"Some words\" \n",
5252
"text_block = {\n",
5353
" \"type\": \"text\",\n",
5454
" \"text\": {\n",
@@ -66,11 +66,18 @@
6666
" \"href\": None\n",
6767
"}\n",
6868
"\n",
69+
"equation = {\n",
70+
" \"type\": \"equation\",\n",
71+
" \"equation\": {\n",
72+
" \"expression\": \"e=mc^2\"\n",
73+
" }\n",
74+
"}\n",
75+
"\n",
6976
"children = [{\n",
7077
" \"object\": \"block\",\n",
7178
" \"type\": \"paragraph\",\n",
7279
" \"paragraph\": {\n",
73-
" \"rich_text\": [text_block]\n",
80+
" \"rich_text\": [text_block, equation]\n",
7481
" }\n",
7582
"}]\n",
7683
"\n",
@@ -84,35 +91,6 @@
8491
"created_page = cast(Dict[str, Any], created_page)\n",
8592
"print(f'page_id = {created_page[\"id\"]}')"
8693
]
87-
},
88-
{
89-
"cell_type": "code",
90-
"execution_count": null,
91-
"metadata": {},
92-
"outputs": [],
93-
"source": [
94-
"text_content = \"Some words\" * 200 + \"Any\"\n",
95-
"from html2notion.translate.html2json_base import Html2JsonBase\n",
96-
"\n",
97-
"text_obj = Html2JsonBase.generate_text(plain_text=text_content)\n",
98-
"children_2 = [{\n",
99-
" \"object\": \"block\",\n",
100-
" \"type\": \"paragraph\",\n",
101-
" \"paragraph\": {\n",
102-
" \"rich_text\": [text_obj]\n",
103-
" }\n",
104-
"}]\n",
105-
"\n",
106-
"created_page_2 = notion.pages.create(\n",
107-
" parent=parent,\n",
108-
" properties=properties,\n",
109-
" children=children_2\n",
110-
")\n",
111-
"\n",
112-
"from typing import Dict, Any, cast\n",
113-
"created_page_2 = cast(Dict[str, Any], created_page)\n",
114-
"print(f'page_id = {created_page_2[\"id\"]}')"
115-
]
11694
}
11795
],
11896
"metadata": {

html2notion/translate/html2json_base.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ def generate_inline_obj(self, tag: PageElement):
160160
return res_obj
161161

162162

163-
164163
def generate_link(self, **kwargs):
165164
plain_text = kwargs.get("plain_text", "")
166165
if not plain_text:
@@ -425,7 +424,7 @@ def _convert_one_list_item(self, soup, list_type):
425424
"""
426425
<div>
427426
<div><br /></div>
428-
<table> <tbody> <tr> <td> </td> </tr> </tbody>
427+
<table> <thead> </thead><tbody> <tr> <td> </td> </tr> </tbody> </table>
429428
<div><br /></div>
430429
</div>
431430
"""
@@ -438,10 +437,12 @@ def convert_table(self, soup):
438437
return
439438

440439
table_width = len(tr_tags[0].find_all('td'))
440+
has_header = False
441441
for tr in tr_tags:
442442
td_tags = tr.find_all('td')
443443
if not td_tags:
444-
continue
444+
td_tags = tr.find_all('th')
445+
has_header = True
445446
table_width = max(table_width, len(td_tags))
446447
one_row = {
447448
"type": "table_row",
@@ -457,7 +458,7 @@ def convert_table(self, soup):
457458
table_obj = {
458459
"table": {
459460
"has_row_header": False,
460-
"has_column_header": False,
461+
"has_column_header": has_header,
461462
"table_width": table_width,
462463
"children": table_rows,
463464
}

html2notion/translate/html2json_markdown.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def convert_code(self, soup):
130130

131131
css_dict = Html2JsonBase.get_tag_style(code_tag)
132132
language = css_dict.get('language', 'plain text')
133-
json_obj["code"]["language"] = language
133+
json_obj["code"]["language"] = Html2JsonBase.get_valid_language(language)
134134
json_obj["code"]["rich_text"] = self.merge_rich_text(rich_text)
135135
return json_obj
136136

@@ -148,6 +148,24 @@ def convert_quote(self, soup):
148148
rich_text.extend(text_obj)
149149
return json_obj
150150

151+
def convert_equation(self, soup: Tag):
152+
json_obj = {
153+
"object": "block",
154+
"type": "paragraph",
155+
"paragraph": {
156+
"rich_text": []
157+
}
158+
}
159+
expression = soup.get_text()
160+
equation = json_obj["paragraph"]["rich_text"]
161+
equation.append({
162+
"type": "equation",
163+
"equation": {
164+
"expression": expression
165+
}
166+
})
167+
return json_obj
168+
151169
def convert_to_do(self, soup: Tag):
152170
li_tags = soup.find_all('li', recursive=True)
153171
childs = li_tags if li_tags else [soup]
@@ -183,7 +201,7 @@ def _is_checkbox(self, soup):
183201
def _extract_code_blocks(self):
184202
code_pattern = re.compile(r'```(\w+)?\n(.*?)```', re.DOTALL)
185203
matches = code_pattern.findall(self.markdown)
186-
code_blocks = [{'language': Html2JsonBase.get_valid_language(match[0]), 'code': match[1]} for match in matches]
204+
code_blocks = [{'language': match[0], 'code': match[1].rstrip('\n')} for match in matches]
187205
return code_blocks
188206

189207
def _replace_pre_code(self, soup):

0 commit comments

Comments
 (0)