Skip to content

Commit

Permalink
jsdomに戻し、brのみを改行とすることに変更 (#149)
Browse files Browse the repository at this point in the history
  • Loading branch information
ttizze authored Aug 3, 2024
2 parents 1736ee2 + 59266eb commit f8f7d6b
Showing 1 changed file with 17 additions and 48 deletions.
65 changes: 17 additions & 48 deletions web/app/feature/translate/utils/extractNumberedElements.ts
Original file line number Diff line number Diff line change
@@ -1,57 +1,26 @@
import { Parser } from "htmlparser2";
import { JSDOM } from "jsdom";

export function extractNumberedElements(
content: string,
title: string,
): Array<{ number: number; text: string }> {
const numberedElements: Array<{ number: number; text: string }> = [
{ number: 0, text: title },
];
const doc = new JSDOM(content);
const numberedElements: Array<{ number: number; text: string }> = [];
// <br>のみを改行とする
doc.window.document.body.innerHTML = doc.window.document.body.innerHTML
.replace(/\n/g, "")
.replace(/<br\s*\/?>/gi, "\n");

let currentNumber: number | null = null;
let currentText: string[] = [];
let inNumberedElement = false;
const elements = doc.window.document.querySelectorAll("[data-number]");

const parser = new Parser(
{
onopentag(name: string, attributes: { [x: string]: string }) {
if (attributes["data-number"]) {
currentNumber = Number.parseInt(attributes["data-number"], 10);
inNumberedElement = true;
currentText = [];
}
},
ontext(text) {
if (inNumberedElement) {
currentText.push(text.trim());
}
},
onclosetag(name) {
if (name === "br" && inNumberedElement) {
currentText.push("::BR::");
} else if (currentNumber !== null && inNumberedElement) {
const processedText = currentText
.join("")
.replace(/\s+/g, " ")
.trim()
.replace(/::BR::/g, "\n");

if (processedText) {
numberedElements.push({
number: currentNumber,
text: processedText,
});
}
currentNumber = null;
inNumberedElement = false;
}
},
},
{ decodeEntities: true },
);

parser.write(content);
parser.end();
for (const element of elements) {
const dataNumber = element.getAttribute("data-number");
if (dataNumber !== null) {
numberedElements.push({
number: Number.parseInt(dataNumber, 10),
text: element.textContent?.trim() || "",
});
}
}

return numberedElements.sort((a, b) => a.number - b.number);
}

0 comments on commit f8f7d6b

Please sign in to comment.