Skip to content

Commit

Permalink
改行問題に対応するためJSDOMから変更 (#147)
Browse files Browse the repository at this point in the history
  • Loading branch information
ttizze authored Aug 3, 2024
2 parents 2d46801 + 2b873df commit 1736ee2
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 17 deletions.
58 changes: 42 additions & 16 deletions web/app/feature/translate/utils/extractNumberedElements.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,57 @@
import { JSDOM } from "jsdom";
import { Parser } from "htmlparser2";

export function extractNumberedElements(
content: string,
title: string,
): Array<{ number: number; text: string }> {
const doc = new JSDOM(content);
const numberedElements: Array<{ number: number; text: string }> = [
{ number: 0, text: title },
];

function traverseNodes(node: Node) {
if (node.nodeType === 1) {
const element = node as Element;
const dataNumber = element.getAttribute("data-number");
let currentNumber: number | null = null;
let currentText: string[] = [];
let inNumberedElement = false;

if (dataNumber !== null) {
numberedElements.push({
number: Number.parseInt(dataNumber, 10),
text: element.textContent?.trim() || "",
});
}
const parser = new Parser(
{
onopentag(name: string, attributes: { [x: string]: string }) {
if (attributes["data-number"]) {
currentNumber = Number.parseInt(attributes["data-number"], 10);
inNumberedElement = true;
currentText = [];
}
},
ontext(text) {
if (inNumberedElement) {
currentText.push(text.trim());
}
},
onclosetag(name) {
if (name === "br" && inNumberedElement) {
currentText.push("::BR::");
} else if (currentNumber !== null && inNumberedElement) {
const processedText = currentText
.join("")
.replace(/\s+/g, " ")
.trim()
.replace(/::BR::/g, "\n");

element.childNodes.forEach(traverseNodes);
}
}
if (processedText) {
numberedElements.push({
number: currentNumber,
text: processedText,
});
}
currentNumber = null;
inNumberedElement = false;
}
},
},
{ decodeEntities: true },
);

doc.window.document.body.childNodes.forEach(traverseNodes);
parser.write(content);
parser.end();

return numberedElements.sort((a, b) => a.number - b.number);
}
3 changes: 2 additions & 1 deletion web/app/feature/translate/utils/generateGeminiMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export function generateSystemMessage(
- Ensure that each "text" field contains at least one character.
- Maintain the original array structure and order, with the title translation added as the first item.
- Output ONLY the translated JSON array. No additional text or explanations.
- Preserve and output newline characters (\n) as they are. It is important to maintain line breaks within the text.
Input text:
${source_text}
Expand All @@ -54,7 +55,7 @@ export function generateSystemMessage(
},
{
"number": 2,
"text": "Translated text for item 2"
"text": "Translated text \n for item 2"
},
...
]`;
Expand Down

0 comments on commit 1736ee2

Please sign in to comment.