diff --git a/js/export/export.js b/js/export/export.js
index 2279c58..20268aa 100644
--- a/js/export/export.js
+++ b/js/export/export.js
@@ -9,6 +9,7 @@ import { writePdf } from './pdf/writePdf.js';
 import { writeHocr } from './writeHocr.js';
 import { writeText } from './writeText.js';
 import { writeHtml } from './writeHtml.js';
+import { writeAlto } from './writeAlto.js';
 import { removeCircularRefsOcr } from '../objects/ocrObjects.js';
 import { removeCircularRefsDataTables } from '../objects/layoutObjects.js';
 import { FontCont } from '../containers/fontContainer.js';
@@ -16,7 +17,7 @@ import { FontCont } from '../containers/fontContainer.js';
 /**
  * Export active OCR data to specified format.
  * @public
- * @param {'pdf'|'hocr'|'docx'|'html'|'xlsx'|'txt'|'text'|'scribe'} [format='txt']
+ * @param {'pdf'|'hocr'|'alto'|'docx'|'html'|'xlsx'|'txt'|'text'|'scribe'} [format='txt']
  * @param {number} [minPage=0] - First page to export.
  * @param {number} [maxPage=-1] - Last page to export (inclusive). -1 exports through the last page.
  * @returns {Promise<string|ArrayBuffer>}
@@ -218,6 +219,8 @@ export async function exportData(format = 'txt', minPage = 0, maxPage = -1) {
     }
   } else if (format === 'hocr') {
     content = writeHocr({ ocrData: ocrDownload, minValue: minPage, maxValue: maxPage });
+  } else if (format === 'alto') {
+    content = writeAlto({ ocrData: ocrDownload, minValue: minPage, maxValue: maxPage });
   } else if (format === 'html') {
     const images = /** @type {Array<ImageWrapper>} */ ([]);
     if (opt.includeImages) {
@@ -291,14 +294,15 @@ export async function exportData(format = 'txt', minPage = 0, maxPage = -1) {
 /**
  * Runs `exportData` and saves the result as a download (browser) or local file (Node.js).
  * @public
- * @param {'pdf'|'hocr'|'docx'|'xlsx'|'txt'|'text'|'html'|'scribe'} format
+ * @param {'pdf'|'hocr'|'alto'|'docx'|'xlsx'|'txt'|'text'|'html'|'scribe'} format
  * @param {string} fileName
  * @param {number} [minPage=0] - First page to export.
  * @param {number} [maxPage=-1] - Last page to export (inclusive). -1 exports through the last page.
  */
 export async function download(format, fileName, minPage = 0, maxPage = -1) {
   if (format === 'text') format = 'txt';
-  fileName = fileName.replace(/\.\w{1,6}$/, `.${format}`);
+  const ext = format === 'alto' ? 'xml' : format;
+  fileName = fileName.replace(/\.\w{1,6}$/, `.${ext}`);
   const content = await exportData(format, minPage, maxPage);
   await saveAs(content, fileName);
 }
diff --git a/js/export/writeAlto.js b/js/export/writeAlto.js
new file mode 100644
index 0000000..4533922
--- /dev/null
+++ b/js/export/writeAlto.js
@@ -0,0 +1,313 @@
+import { opt } from '../containers/app.js';
+import { pageMetricsAll } from '../containers/dataContainer.js';
+import ocr from '../objects/ocrObjects.js';
+
+/**
+ * Converts Tesseract language codes to ISO 639-2 codes for ALTO XML
+ * @param {string} tesseractLang
+ */
+function tesseractToISO6392(tesseractLang) {
+  const langMap = {
+    eng: 'en-US',
+    fra: 'fr-FR',
+    deu: 'de-DE',
+    spa: 'es-ES',
+    ita: 'it-IT',
+    por: 'pt-PT',
+    nld: 'nl-NL',
+    rus: 'ru-RU',
+    pol: 'pl-PL',
+    ces: 'cs-CZ',
+    slk: 'sk-SK',
+    ukr: 'uk-UA',
+    hun: 'hu-HU',
+    ron: 'ro-RO',
+    hrv: 'hr-HR',
+    srp: 'sr-RS',
+    bul: 'bg-BG',
+    slv: 'sl-SI',
+    cat: 'ca-ES',
+    dan: 'da-DK',
+    fin: 'fi-FI',
+    nor: 'no-NO',
+    swe: 'sv-SE',
+    tur: 'tr-TR',
+    ell: 'el-GR',
+    ara: 'ar-SA',
+    heb: 'he-IL',
+    hin: 'hi-IN',
+    jpn: 'ja-JP',
+    kor: 'ko-KR',
+    chi_sim: 'zh-CN',
+    chi_tra: 'zh-TW',
+    tha: 'th-TH',
+    vie: 'vi-VN',
+  };
+  return langMap[tesseractLang] || tesseractLang;
+}
+
+/**
+ * Exports OCR data to ALTO XML format (v2.0)
+ * @param {Object} params
+ * @param {Array<OcrPage>} params.ocrData - OCR data to export
+ * @param {number} [params.minValue] - First page to export (inclusive)
+ * @param {number} [params.maxValue] - Last page to export (inclusive)
+ * @returns {string} ALTO XML formatted string
+ */
+export function writeAlto({ ocrData, minValue, maxValue }) {
+  if (minValue === null || minValue === undefined) minValue = 0;
+  if (maxValue === null || maxValue === undefined || maxValue < 0) maxValue = ocrData.length - 1;
+
+  const stylesMap = new Map();
+  let styleIdCounter = 0;
+
+  /**
+   * Get or create a style ID for a given font family and size
+   * @param {string} fontFamily
+   * @param {number} fontSize
+   * @returns {string}
+   */
+  function getStyleId(fontFamily, fontSize) {
+    const key = `${fontFamily || 'Default'}_${fontSize || 10}`;
+    if (!stylesMap.has(key)) {
+      const styleId = `font${styleIdCounter++}`;
+      stylesMap.set(key, { id: styleId, fontFamily: fontFamily || 'Default', fontSize: fontSize || 10 });
+    }
+    return stylesMap.get(key).id;
+  }
+
+  for (let i = minValue; i <= maxValue; i++) {
+    const pageObj = ocrData[i];
+    if (!pageObj) continue;
+
+    for (const lineObj of pageObj.lines) {
+      for (const wordObj of lineObj.words) {
+        if (wordObj.style.font || wordObj.style.size) {
+          getStyleId(wordObj.style.font, wordObj.style.size);
+        }
+      }
+    }
+  }
+
+  let altoOut = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
+  altoOut += '<alto xmlns="http://www.loc.gov/standards/alto/ns-v2#" ';
+  altoOut += 'xmlns:xlink="http://www.w3.org/1999/xlink" ';
+  altoOut += 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ';
+  altoOut += 'xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd">\n';
+
+  altoOut += '<Description>\n';
+  altoOut += '<MeasurementUnit>pixel</MeasurementUnit>\n';
+  altoOut += '<OCRProcessing ID="IdOcr"><ocrProcessingStep>';
+  const today = new Date().toISOString().split('T')[0];
+  altoOut += `<processingDateTime>${today}</processingDateTime>`;
+  altoOut += '<processingSoftware>';
+  altoOut += '<softwareCreator>scribeocr</softwareCreator>';
+  altoOut += '<softwareName>scribe.js</softwareName>';
+  altoOut += '</processingSoftware>';
+  altoOut += '</ocrProcessingStep></OCRProcessing>\n';
+  altoOut += '</Description>\n';
+
+  if (stylesMap.size > 0) {
+    altoOut += '<Styles>';
+    for (const [, style] of stylesMap) {
+      altoOut += `<TextStyle ID="${style.id}" FONTFAMILY="${ocr.escapeXml(style.fontFamily)}" FONTSIZE="${style.fontSize}"/>`;
+    }
+    altoOut += '\n</Styles>\n';
+  }
+
+  altoOut += '<Layout>\n';
+
+  for (let pageIndex = minValue; pageIndex <= maxValue; pageIndex++) {
+    const pageObj = ocrData[pageIndex];
+
+    let pageHeight = 0;
+    let pageWidth = 0;
+    if (pageObj) {
+      pageHeight = pageObj.dims.height;
+      pageWidth = pageObj.dims.width;
+    } else if (pageMetricsAll[pageIndex]) {
+      pageHeight = pageMetricsAll[pageIndex].dims.height;
+      pageWidth = pageMetricsAll[pageIndex].dims.width;
+    }
+
+    altoOut += `<Page ID="Page${pageIndex + 1}" PHYSICAL_IMG_NR="${pageIndex + 1}" HEIGHT="${pageHeight}" WIDTH="${pageWidth}">\n`;
+
+    if (!pageObj || pageObj.lines.length === 0) {
+      altoOut += '</Page>\n';
+      continue;
+    }
+
+    altoOut += `<PrintSpace HEIGHT="${pageHeight}" WIDTH="${pageWidth}" VPOS="0" HPOS="0">\n`;
+
+    let parCurrent = null;
+    let blockIndex = 0;
+    let blockStyleRef = null;
+    let blockLang = null;
+
+    for (let lineIndex = 0; lineIndex < pageObj.lines.length; lineIndex++) {
+      const lineObj = pageObj.lines[lineIndex];
+
+      if (lineObj.words.length === 0) continue;
+
+      if (blockIndex === 0 || lineObj.par !== parCurrent) {
+        if (blockIndex > 0) {
+          altoOut += '</TextBlock>\n';
+        }
+
+        parCurrent = lineObj.par;
+
+        let blockLeft = Math.round(lineObj.bbox.left);
+        let blockTop = Math.round(lineObj.bbox.top);
+        let blockRight = Math.round(lineObj.bbox.right);
+        let blockBottom = Math.round(lineObj.bbox.bottom);
+
+        const blockStyleCounts = new Map();
+        const blockLangCounts = new Map();
+        for (let j = lineIndex; j < pageObj.lines.length; j++) {
+          const nextLine = pageObj.lines[j];
+          if (nextLine.words.length === 0) continue;
+          if (j > lineIndex && nextLine.par !== parCurrent) break;
+
+          if (j > lineIndex) {
+            blockLeft = Math.min(blockLeft, Math.round(nextLine.bbox.left));
+            blockTop = Math.min(blockTop, Math.round(nextLine.bbox.top));
+            blockRight = Math.max(blockRight, Math.round(nextLine.bbox.right));
+            blockBottom = Math.max(blockBottom, Math.round(nextLine.bbox.bottom));
+          }
+
+          for (const word of nextLine.words) {
+            if (word.style.font || word.style.size) {
+              const styleId = getStyleId(word.style.font || '', word.style.size || 0);
+              blockStyleCounts.set(styleId, (blockStyleCounts.get(styleId) || 0) + 1);
+            }
+            if (word.lang) {
+              blockLangCounts.set(word.lang, (blockLangCounts.get(word.lang) || 0) + 1);
+            }
+          }
+        }
+
+        blockStyleRef = null;
+        let maxCount = 0;
+        for (const [styleId, count] of blockStyleCounts) {
+          if (count > maxCount) {
+            maxCount = count;
+            blockStyleRef = styleId;
+          }
+        }
+
+        blockLang = null;
+        let maxLangCount = 0;
+        for (const [lang, count] of blockLangCounts) {
+          if (count > maxLangCount) {
+            maxLangCount = count;
+            blockLang = lang;
+          }
+        }
+
+        const blockWidth = blockRight - blockLeft;
+        const blockHeight = blockBottom - blockTop;
+
+        altoOut += `<TextBlock ID="Page${pageIndex + 1}_Block${blockIndex + 1}" `;
+        altoOut += `HEIGHT="${blockHeight}" WIDTH="${blockWidth}" `;
+        altoOut += `VPOS="${blockTop}" HPOS="${blockLeft}"`;
+        if (blockLang) {
+          altoOut += ` language="${tesseractToISO6392(blockLang)}"`;
+        }
+        if (blockStyleRef) {
+          altoOut += ` STYLEREFS="${blockStyleRef}"`;
+        }
+        altoOut += '>\n';
+
+        blockIndex++;
+      }
+
+      const lineLeft = Math.round(lineObj.bbox.left);
+      const lineTop = Math.round(lineObj.bbox.top);
+      const lineRight = Math.round(lineObj.bbox.right);
+      const lineBottom = Math.round(lineObj.bbox.bottom);
+      const lineWidth = lineRight - lineLeft;
+      const lineHeight = lineBottom - lineTop;
+
+      altoOut += `<TextLine HEIGHT="${lineHeight}" WIDTH="${lineWidth}" `;
+      altoOut += `VPOS="${lineTop}" HPOS="${lineLeft}">`;
+
+      for (let wordIndex = 0; wordIndex < lineObj.words.length; wordIndex++) {
+        const wordObj = lineObj.words[wordIndex];
+
+        const wordLeft = Math.round(wordObj.bbox.left);
+        const wordTop = Math.round(wordObj.bbox.top);
+        const wordRight = Math.round(wordObj.bbox.right);
+        const wordBottom = Math.round(wordObj.bbox.bottom);
+        const wordWidth = wordRight - wordLeft;
+        const wordHeight = wordBottom - wordTop;
+
+        let styleAttr = '';
+        const styleAttrs = [];
+        if (wordObj.style.bold) styleAttrs.push('bold');
+        if (wordObj.style.italic) styleAttrs.push('italic');
+        if (wordObj.style.underline) styleAttrs.push('underline');
+        if (wordObj.style.sup) styleAttrs.push('superscript');
+        if (wordObj.style.smallCaps) styleAttrs.push('smallCaps');
+
+        if (styleAttrs.length > 0) {
+          styleAttr = ` STYLE="${styleAttrs.join(' ')}"`;
+        }
+
+        let styleRefsAttr = '';
+        if (wordObj.style.font || wordObj.style.size) {
+          const styleId = getStyleId(wordObj.style.font || '', wordObj.style.size || 0);
+          // Only add STYLEREFS if it differs from the block-level style
+          if (styleId !== blockStyleRef) {
+            styleRefsAttr = ` STYLEREFS="${styleId}"`;
+          }
+        }
+
+        let langAttr = '';
+        if (wordObj.lang) {
+          // Only add language if it differs from the block-level language
+          if (wordObj.lang !== blockLang) {
+            langAttr = ` language="${tesseractToISO6392(wordObj.lang)}"`;
+          }
+        }
+
+        let wcAttr = '';
+        if (wordObj.conf !== undefined && wordObj.conf !== null) {
+          const confNormalized = wordObj.conf / 100;
+          wcAttr = ` WC="${confNormalized.toFixed(2)}"`;
+        }
+
+        altoOut += `<String${styleAttr}${langAttr}${wcAttr}${styleRefsAttr} `;
+        altoOut += `CONTENT="${ocr.escapeXml(wordObj.text)}" `;
+        altoOut += `HEIGHT="${wordHeight}" WIDTH="${wordWidth}" `;
+        altoOut += `VPOS="${wordTop}" HPOS="${wordLeft}"/>`;
+
+        // The ALTO XML format uses explicit SP elements to denote spaces between words.
+        // While this seems redundant if we understand each <string> element to represent a word,
+        // it is encouraged by Library of Congress standards.
+        // "The use of SP and HYP are encouraged"
+        // https://www.loc.gov/ndnp/guidelines/NDNP_202628TechNotes.pdf
+        if (wordIndex < lineObj.words.length - 1) {
+          const nextWord = lineObj.words[wordIndex + 1];
+          const spaceWidth = Math.round(nextWord.bbox.left) - wordRight - 2;
+          if (spaceWidth > 0) {
+            altoOut += `<SP WIDTH="${spaceWidth}" VPOS="${wordTop}" HPOS="${wordRight + 1}"/>`;
+          }
+        }
+      }
+
+      altoOut += '</TextLine>\n';
+    }
+
+    altoOut += '</TextBlock>\n';
+
+    altoOut += '</PrintSpace>\n';
+    altoOut += '</Page>\n';
+
+    opt.progressHandler({ n: pageIndex, type: 'export', info: {} });
+  }
+
+  altoOut += '</Layout>\n';
+  altoOut += '</alto>\n';
+
+  return altoOut;
+}
diff --git a/js/export/writeDocx.js b/js/export/writeDocx.js
index c0d186c..3199ef3 100644
--- a/js/export/writeDocx.js
+++ b/js/export/writeDocx.js
@@ -32,8 +32,7 @@ export function writeDocxContent({
 
     const pageObj = ocrCurrent[g];
 
-    // Do not overwrite paragraphs from Abbyy or Textract.
-    if (reflowText && (!pageObj.textSource || !['textract', 'abbyy'].includes(pageObj.textSource))) {
+    if (reflowText && (!pageObj.textSource || !['textract', 'abbyy', 'google_vision', 'azure_doc_intel', 'docx'].includes(pageObj.textSource))) {
       const angle = pageMetricsAll[g].angle || 0;
       assignParagraphs(pageObj, angle);
     }
@@ -78,6 +77,10 @@ export function writeDocxContent({
           fontStyle += '<w:vertAlign w:val="superscript"/>';
         }
 
+        if (wordObj.style.font) {
+          fontStyle += `<w:rFonts w:ascii="${ocr.escapeXml(wordObj.style.font)}" w:hAnsi="${ocr.escapeXml(wordObj.style.font)}"/>`;
+        }
+
         if (newLine || fontStyle !== fontStylePrev || (h === 0 && g === 0 && i === 0)) {
           const styleStr = fontStyle === '' ? '' : `<w:rPr>${fontStyle}</w:rPr>`;
 
diff --git a/js/export/writeText.js b/js/export/writeText.js
index f408a08..f3961bf 100644
--- a/js/export/writeText.js
+++ b/js/export/writeText.js
@@ -27,8 +27,7 @@ export function writeText({
 
     const pageObj = ocrCurrent[g];
 
-    // Do not overwrite paragraphs from Abbyy or Textract.
-    if (reflowText && (!pageObj.textSource || !['textract', 'abbyy'].includes(pageObj.textSource))) {
+    if (reflowText && (!pageObj.textSource || !['textract', 'abbyy', 'google_vision', 'azure_doc_intel', 'docx'].includes(pageObj.textSource))) {
       const angle = pageMetricsAll[g].angle || 0;
       assignParagraphs(pageObj, angle);
     }
diff --git a/js/generalWorkerMain.js b/js/generalWorkerMain.js
index 62e357e..b1a6163 100644
--- a/js/generalWorkerMain.js
+++ b/js/generalWorkerMain.js
@@ -100,6 +100,7 @@ export async function initGeneralWorker() {
     obj.convertDocAzureDocIntel = wrap('convertDocAzureDocIntel');
     obj.convertPageGoogleVision = wrap('convertPageGoogleVision');
     obj.convertPageText = wrap('convertPageText');
+    obj.convertDocDocx = wrap('convertDocDocx');
 
     obj.optimizeFont = wrap('optimizeFont');
 
@@ -212,6 +213,15 @@ export class gs {
     return gs.schedulerInner.addJob('convertDocAzureDocIntel', args);
   };
 
+  /**
+   * @param {Parameters<typeof import('./import/convertDocDocx.js').convertDocDocx>[0]} args
+   * @returns {ReturnType<typeof import('./import/convertDocDocx.js').convertDocDocx>}
+   */
+  static convertDocDocx = async (args) => {
+    await gs.getGeneralScheduler();
+    return gs.schedulerInner.addJob('convertDocDocx', args);
+  };
+
   /**
    * @param {Parameters<typeof import('./import/convertPageGoogleVision.js').convertPageGoogleVision>[0]} args
    * @returns {ReturnType<typeof import('./import/convertPageGoogleVision.js').convertPageGoogleVision>}
diff --git a/js/global.d.ts b/js/global.d.ts
index 23cc267..9d20d81 100644
--- a/js/global.d.ts
+++ b/js/global.d.ts
@@ -13,7 +13,7 @@ declare global {
 
     // Strings representing supported sources of text.
     // `stext` indicates the text was extracted directly from a PDF using mupdf.
-    type TextSource = null | 'tesseract' | 'textract' | 'google_vision' | 'abbyy' | 'alto' | 'stext' | 'hocr' | 'text' | 'azure_doc_intel';
+    type TextSource = null | 'tesseract' | 'textract' | 'google_vision' | 'abbyy' | 'alto' | 'stext' | 'hocr' | 'text' | 'azure_doc_intel' | 'docx';
 
     type FontState = {
         enableOpt: boolean;
diff --git a/js/import/convertDocDocx.js b/js/import/convertDocDocx.js
new file mode 100644
index 0000000..1c3f44e
--- /dev/null
+++ b/js/import/convertDocDocx.js
@@ -0,0 +1,337 @@
+import ocr from '../objects/ocrObjects.js';
+import { LayoutDataTablePage } from '../objects/layoutObjects.js';
+import { calcWordCharMetrics } from '../utils/fontUtils.js';
+import { FontCont } from '../containers/fontContainer.js';
+import { unescapeXml } from '../utils/miscUtils.js';
+
+const FONT_FAMILY = 'Times New Roman';
+const FONT_SIZE = 14;
+const CHAR_SPACING = 0;
+const WORD_SPACING = 0;
+const LINE_HEIGHT = 14.4;
+const MARGIN_VERTICAL = 30;
+const MARGIN_HORIZONTAL = 20;
+
+/** @type {?opentype.Font} */
+let fontOpentype = null;
+
+/**
+ * Calculates the advance of a string in pixels.
+ * @param {string} text
+ * @param {number} size
+ * @param {opentype.Font} font
+ */
+function getTextWidth(text, size, font) {
+  const { advanceArr, kerningArr } = calcWordCharMetrics(text, font);
+
+  const advanceTotal = advanceArr.reduce((a, b) => a + b, 0);
+  const kerningTotal = kerningArr.reduce((a, b) => a + b, 0);
+
+  const wordLastGlyphMetrics = font.charToGlyph(text.at(-1)).getMetrics();
+  const wordFirstGlyphMetrics = font.charToGlyph(text[0]).getMetrics();
+
+  const wordLeftBearing = wordFirstGlyphMetrics.xMin || 0;
+  const lastGlyphMax = wordLastGlyphMetrics.xMax || 0;
+  const wordRightBearing = advanceArr[advanceArr.length - 1] - lastGlyphMax;
+
+  const wordWidth1 = (advanceTotal + kerningTotal - wordLeftBearing - wordRightBearing);
+  const wordWidth1Px = wordWidth1 * (size / font.unitsPerEm);
+  const spacingTotalPx = (text.length - 1) * CHAR_SPACING;
+  const wordWidth = wordWidth1Px + spacingTotalPx;
+
+  return wordWidth;
+}
+
+/**
+ * Parse XML text content from a docx run element
+ * @param {string} runXml - XML string of a <w:r> element
+ * @returns {{text: string, styles: {bold: boolean, italic: boolean, smallCaps: boolean, underline: boolean, sup: boolean, font: string | null}}}
+ */
+function parseRunElement(runXml) {
+  const styles = {
+    bold: /<w:b\s*\/>/.test(runXml) || /<w:b\s+w:val="true"/.test(runXml) || /<w:b\s+w:val="1"/.test(runXml),
+    italic: /<w:i\s*\/>/.test(runXml) || /<w:i\s+w:val="true"/.test(runXml) || /<w:i\s+w:val="1"/.test(runXml),
+    smallCaps: /<w:smallCaps\s*\/>/.test(runXml) || /<w:smallCaps\s+w:val="true"/.test(runXml) || /<w:smallCaps\s+w:val="1"/.test(runXml),
+    underline: /<w:u\s+w:val="single"/.test(runXml) || (/<w:u\s*\/>/.test(runXml) && !/<w:u\s+w:val="none"/.test(runXml)),
+    sup: /<w:vertAlign\s+w:val="superscript"/.test(runXml),
+    font: null,
+  };
+
+  // Extract font family from <w:rFonts> element
+  const fontMatch = runXml.match(/<w:rFonts\s+[^>]*w:ascii="([^"]+)"/);
+  if (fontMatch) {
+    styles.font = unescapeXml(fontMatch[1]);
+  } else {
+    // Try w:hAnsi if ascii not found
+    const fontMatchHAnsi = runXml.match(/<w:rFonts\s+[^>]*w:hAnsi="([^"]+)"/);
+    if (fontMatchHAnsi) {
+      styles.font = unescapeXml(fontMatchHAnsi[1]);
+    }
+  }
+
+  const textMatches = runXml.matchAll(/<w:t[^>]*>([^<]*)<\/w:t>/g);
+  let text = '';
+  for (const match of textMatches) {
+    text += unescapeXml(match[1]);
+  }
+
+  return { text, styles };
+}
+
+/**
+ * Parse paragraphs from docx document.xml content
+ * @param {string} docXml - The content of word/document.xml
+ * @returns {Array<Array<{text: string, styles: {bold: boolean, italic: boolean, smallCaps: boolean, underline: boolean, sup: boolean, font: string | null}}>>}
+ */
+function parseParagraphs(docXml) {
+  const paragraphs = [];
+
+  const paragraphMatches = docXml.matchAll(/<w:p[^>]*>(.*?)<\/w:p>/gs);
+
+  for (const parMatch of paragraphMatches) {
+    const parContent = parMatch[1];
+    const runs = [];
+
+    const runMatches = parContent.matchAll(/<w:r[^>]*>(.*?)<\/w:r>/gs);
+
+    for (const runMatch of runMatches) {
+      const runContent = runMatch[1];
+      const parsed = parseRunElement(runContent);
+
+      if (parsed.text) {
+        runs.push(parsed);
+      }
+    }
+
+    if (runs.length > 0) {
+      paragraphs.push(runs);
+    }
+  }
+
+  return paragraphs;
+}
+
+/**
+ * Convert a docx file to internal OCR format
+ * @param {Object} params
+ * @param {ArrayBuffer} params.docxData - The docx file data
+ * @param {?{width: number, height: number}} [params.pageDims] - Page dimensions (will be calculated if not provided)
+ */
+export async function convertDocDocx({ docxData, pageDims = null }) {
+  const { BlobReader, BlobWriter, ZipReader } = await import('../../lib/zip.js/index.js');
+
+  const blob = new Blob([docxData]);
+
+  const zipReader = new ZipReader(new BlobReader(blob));
+  const entries = await zipReader.getEntries();
+
+  const documentEntry = entries.find((entry) => entry.filename === 'word/document.xml');
+  if (!documentEntry) {
+    throw new Error('No word/document.xml found in docx file');
+  }
+
+  const writer = new BlobWriter();
+  await documentEntry.getData(writer);
+  const documentBlob = await writer.getData();
+  const documentXml = await documentBlob.text();
+
+  await zipReader.close();
+
+  const pagesOut = await convertDocumentXML({ documentXml, pageDims });
+
+  return pagesOut;
+}
+
+/**
+ * Convert a docx file to internal OCR format
+ * @param {Object} params
+ * @param {string} params.documentXml
+ * @param {?{width: number, height: number}} [params.pageDims] - Page dimensions (will be calculated if not provided)
+ */
+const convertDocumentXML = async ({ documentXml, pageDims = null }) => {
+  if (!fontOpentype) {
+    fontOpentype = (await FontCont.getFont({ font: FONT_FAMILY })).opentype;
+  }
+
+  const ASCENDER_HEIGHT = fontOpentype.ascender * (FONT_SIZE / fontOpentype.unitsPerEm);
+  const DESCENDER_HEIGHT = fontOpentype.descender * (FONT_SIZE / fontOpentype.unitsPerEm);
+
+  if (!pageDims) {
+    pageDims = { width: 612, height: 792 };
+  }
+
+  const paragraphs = parseParagraphs(documentXml);
+
+  const pagesOut = [];
+  let pageIndex = 0;
+  let pageObj = new ocr.OcrPage(pageIndex, pageDims);
+  pageObj.textSource = 'docx';
+  let tablesPage = new LayoutDataTablePage(0);
+  pagesOut.push({ pageObj, dataTables: tablesPage });
+
+  const availableWidth = pageDims.width - MARGIN_HORIZONTAL * 2;
+  let currentY = MARGIN_VERTICAL + LINE_HEIGHT / 2;
+
+  for (const paragraph of paragraphs) {
+    const parLines = [];
+    let parRight = MARGIN_HORIZONTAL;
+    let runIndex = 0;
+    let charIndexInRun = 0;
+
+    while (runIndex < paragraph.length) {
+      if (currentY + FONT_SIZE > pageDims.height - MARGIN_VERTICAL) {
+        if (parLines.length > 0) {
+          const parBbox = {
+            left: MARGIN_HORIZONTAL,
+            top: parLines[0].bbox.top,
+            right: parRight,
+            bottom: parLines[parLines.length - 1].bbox.bottom,
+          };
+          const parObj = new ocr.OcrPar(pageObj, parBbox);
+          parObj.lines = parLines;
+          for (const ln of parLines) ln.par = parObj;
+          pageObj.pars.push(parObj);
+          parLines.length = 0;
+          parRight = MARGIN_HORIZONTAL;
+        }
+        pageIndex++;
+        const newPage = new ocr.OcrPage(pageIndex, pageDims);
+        newPage.textSource = 'docx';
+        const newTables = new LayoutDataTablePage(pageIndex);
+        pagesOut.push({ pageObj: newPage, dataTables: newTables });
+        pageObj = newPage;
+        tablesPage = newTables;
+        currentY = MARGIN_VERTICAL + LINE_HEIGHT / 2;
+      }
+
+      const baseline = [0, DESCENDER_HEIGHT];
+      const lineTop = Math.round(currentY - ASCENDER_HEIGHT);
+      const lineBottom = Math.round(currentY + DESCENDER_HEIGHT);
+
+      const lineBbox = {
+        left: MARGIN_HORIZONTAL,
+        top: lineTop,
+        right: MARGIN_HORIZONTAL,
+        bottom: lineBottom,
+      };
+      const lineObj = new ocr.OcrLine(
+        pageObj,
+        lineBbox,
+        baseline,
+        ASCENDER_HEIGHT,
+        null,
+      );
+
+      let currentX = MARGIN_HORIZONTAL;
+      let lineComplete = false;
+      let lastItemWasWhitespace = false;
+
+      while (runIndex < paragraph.length && !lineComplete) {
+        const run = paragraph[runIndex];
+        const remainingText = run.text.substring(charIndexInRun);
+
+        const words = remainingText.split(/(\s+)/);
+
+        for (let wordIdx = 0; wordIdx < words.length; wordIdx++) {
+          const word = words[wordIdx];
+          if (word.length === 0) continue;
+
+          const isWhitespace = /^\s+$/.test(word);
+
+          if (isWhitespace) {
+            const spaceWidth = getTextWidth(' ', FONT_SIZE, fontOpentype) + WORD_SPACING;
+            currentX += spaceWidth * word.length;
+            charIndexInRun += word.length;
+            lastItemWasWhitespace = true;
+          } else {
+            // Check if we should append to the previous word (word continues across runs)
+            // Only append if: we're at the start of a new run AND the last item was NOT whitespace
+            const lastWord = lineObj.words[lineObj.words.length - 1];
+            const shouldAppend = lastWord && wordIdx === 0 && charIndexInRun === 0 && !lastItemWasWhitespace;
+
+            if (shouldAppend) {
+              const combinedText = lastWord.text + word;
+              const combinedWidth = getTextWidth(combinedText, FONT_SIZE, fontOpentype);
+
+              if (lastWord.bbox.left + combinedWidth > MARGIN_HORIZONTAL + availableWidth) {
+                lineComplete = true;
+                break;
+              }
+
+              lastWord.text = combinedText;
+              lastWord.bbox.right = Math.round(lastWord.bbox.left + combinedWidth);
+              currentX = lastWord.bbox.right;
+              charIndexInRun += word.length;
+            } else {
+              const wordWidth = getTextWidth(word, FONT_SIZE, fontOpentype);
+
+              if (lineObj.words.length > 0 && currentX + wordWidth > MARGIN_HORIZONTAL + availableWidth) {
+                lineComplete = true;
+                break;
+              }
+
+              const wordBbox = {
+                left: Math.round(currentX),
+                top: lineTop,
+                right: Math.round(currentX + wordWidth),
+                bottom: lineBottom,
+              };
+              const wordId = `word_${pageIndex + 1}_${pageObj.lines.length + 1}_${lineObj.words.length + 1}`;
+              const wordObj = new ocr.OcrWord(lineObj, wordId, word, wordBbox);
+              wordObj.conf = 100;
+              wordObj.style.font = run.styles.font || FONT_FAMILY;
+
+              wordObj.style.bold = run.styles.bold;
+              wordObj.style.italic = run.styles.italic;
+              wordObj.style.smallCaps = run.styles.smallCaps;
+              wordObj.style.underline = run.styles.underline;
+              wordObj.style.sup = run.styles.sup;
+
+              lineObj.words.push(wordObj);
+              currentX += wordWidth;
+              charIndexInRun += word.length;
+            }
+            lastItemWasWhitespace = false;
+          }
+        }
+
+        if (charIndexInRun >= run.text.length) {
+          runIndex++;
+          charIndexInRun = 0;
+        }
+
+        if (lineComplete) break;
+      }
+
+      if (lineObj.words.length > 0) {
+        lineObj.bbox = {
+          left: lineObj.words[0].bbox.left,
+          top: lineTop,
+          right: lineObj.words[lineObj.words.length - 1].bbox.right,
+          bottom: lineBottom,
+        };
+
+        pageObj.lines.push(lineObj);
+        parLines.push(lineObj);
+        parRight = Math.max(parRight, lineObj.bbox.right);
+      }
+
+      currentY += LINE_HEIGHT;
+    }
+
+    if (parLines.length > 0) {
+      const parBbox = {
+        left: MARGIN_HORIZONTAL,
+        top: parLines[0].bbox.top,
+        right: parRight,
+        bottom: parLines[parLines.length - 1].bbox.bottom,
+      };
+      const parObj = new ocr.OcrPar(pageObj, parBbox);
+      parObj.lines = parLines;
+      for (const ln of parLines) ln.par = parObj;
+      pageObj.pars.push(parObj);
+    }
+  }
+
+  return pagesOut;
+};
diff --git a/js/import/convertPageAbbyy.js b/js/import/convertPageAbbyy.js
index 75c9933..b14765f 100644
--- a/js/import/convertPageAbbyy.js
+++ b/js/import/convertPageAbbyy.js
@@ -85,9 +85,6 @@ export async function convertPageAbbyy({ ocrStr, n }) {
 
     /**
      * Convert Abbyy XML paragraph to internal format.
-     * Note that Abbyy XML paragraphs are not preserved because paragraphs are re-assigned by the `assignParagraphs` function.
-     * Even if this function call was skipped in the code, when saving/restoring the state using .scribe files, paragraph data is not saved.
-     * Further development would be needed to preserve paragraph data.
      * @param {string} xmlPar
      */
     function convertParAbbyy(xmlPar) {
diff --git a/js/import/convertPageAlto.js b/js/import/convertPageAlto.js
index ed40332..65b32ab 100644
--- a/js/import/convertPageAlto.js
+++ b/js/import/convertPageAlto.js
@@ -1,6 +1,7 @@
 import ocr from '../objects/ocrObjects.js';
 
 import {
+  calcBboxUnion,
   unescapeXml,
 } from '../utils/miscUtils.js';
 
@@ -115,7 +116,6 @@ export async function convertPageAlto({ ocrStr, n }) {
         wordObj.conf = Math.round(parseFloat(wcStr) * 100);
       }
 
-      // Parse style attributes
       const styleAttr = getAttr(contentMatch, 'STYLE');
       if (styleAttr) {
         if (/bold/i.test(styleAttr)) wordObj.style.bold = true;
@@ -129,7 +129,6 @@ export async function convertPageAlto({ ocrStr, n }) {
       // Use String's STYLEREFS first, fall back to TextBlock's STYLEREFS
       const styleRefs = getAttr(contentMatch, 'STYLEREFS') || blockStyleRefs;
       if (styleRefs) {
-        // Look up the TextStyle definition in the document
         const styleRegex = new RegExp(`<TextStyle\\s*ID=["']${styleRefs}["'][^>]*>`, 'i');
         const styleMatch = ocrStr.match(styleRegex);
         if (styleMatch) {
@@ -161,9 +160,28 @@ export async function convertPageAlto({ ocrStr, n }) {
     const blockStyleRefs = blockTag ? getAttr(blockTag, 'STYLEREFS') : null;
     const blockContent = blockMatch[1];
 
+    /** @type {Array<OcrLine>} */
+    const parLineArr = [];
+
     const textLinesInBlock = [...blockContent.matchAll(textLineRegex)];
     for (const lineMatch of textLinesInBlock) {
+      const lineCountBefore = pageObj.lines.length;
       convertLine(lineMatch[0], blockStyleRefs);
+      if (pageObj.lines.length > lineCountBefore) {
+        parLineArr.push(pageObj.lines[pageObj.lines.length - 1]);
+      }
+    }
+
+    if (parLineArr.length > 0) {
+      const parbox = calcBboxUnion(parLineArr.map((x) => x.bbox));
+      const parObj = new ocr.OcrPar(pageObj, parbox);
+
+      parLineArr.forEach((x) => {
+        x.par = parObj;
+      });
+
+      parObj.lines = parLineArr;
+      pageObj.pars.push(parObj);
     }
   }
 
diff --git a/js/import/import.js b/js/import/import.js
index efbb0ea..686bfd0 100644
--- a/js/import/import.js
+++ b/js/import/import.js
@@ -104,7 +104,7 @@ export async function sortInputFiles(files) {
     if (['png', 'jpeg', 'jpg'].includes(fileExt)) {
       imageFilesAll.push(file);
       // All .gz files are assumed to be OCR data (xml) since all other file types can be compressed already
-    } else if (['hocr', 'xml', 'html', 'gz', 'stext', 'json', 'txt'].includes(fileExt)) {
+    } else if (['hocr', 'xml', 'html', 'gz', 'stext', 'json', 'txt', 'docx'].includes(fileExt)) {
       ocrFilesAll.push(file);
     } else if (['scribe'].includes(fileExt)) {
       scribeFilesAll.push(file);
@@ -400,7 +400,7 @@ export async function importFiles(files) {
     format = /** @type {("hocr" | "abbyy" | "alto" | "stext" | "textract" | "text")} */ (ocrData.format);
 
     // The text import function requires built-in fonts to be loaded.
-    if (format === 'text') {
+    if (['text', 'docx'].includes(format)) {
       await loadBuiltInFontsRaw();
     }
 
diff --git a/js/import/importOCR.js b/js/import/importOCR.js
index 5a2b197..e287d3d 100644
--- a/js/import/importOCR.js
+++ b/js/import/importOCR.js
@@ -67,6 +67,10 @@ const detectOcrFormat = (ocrStr, ext) => {
     return 'text';
   }
 
+  if (ext && ext.toLowerCase() === 'docx') {
+    return 'docx';
+  }
+
   return null;
 };
 
@@ -98,9 +102,15 @@ export async function importOCRFiles(ocrFilesAll) {
   let serifFont;
 
   if (singleHOCRMode) {
-    const hocrStrAll = await readOcrFile(ocrFilesAll[0]);
-
-    format = detectOcrFormat(hocrStrAll, ocrFilesAll[0]?.name?.split('.').pop());
+    const fileExt = ocrFilesAll[0]?.name?.split('.').pop();
+    let ocrFilesContent;
+    if (fileExt === 'docx') {
+      ocrFilesContent = await ocrFilesAll[0].arrayBuffer();
+      format = 'docx';
+    } else {
+      ocrFilesContent = await readOcrFile(ocrFilesAll[0]);
+      format = detectOcrFormat(ocrFilesContent, fileExt);
+    }
 
     if (!format) {
       console.error(ocrFilesAll[0]);
@@ -108,37 +118,40 @@ export async function importOCRFiles(ocrFilesAll) {
     }
 
     if (format === 'textract') {
-      hocrRaw = [hocrStrAll];
+      hocrRaw = [ocrFilesContent];
     } else if (format === 'google_vision') {
-      hocrRaw = [hocrStrAll];
-      if (hocrStrAll.substring(0, 500).includes('"responses"')) {
-        const responses = JSON.parse(hocrStrAll).responses;
+      hocrRaw = [ocrFilesContent];
+      if (ocrFilesContent.substring(0, 500).includes('"responses"')) {
+        const responses = JSON.parse(ocrFilesContent).responses;
         hocrRaw = responses
           .sort((a, b) => a.context.pageNumber - b.context.pageNumber)
           .map((resp) => JSON.stringify(resp));
       }
     } else if (format === 'azure_doc_intel') {
-      hocrRaw = [hocrStrAll];
+      hocrRaw = [ocrFilesContent];
     } else if (format === 'alto') {
       // Extract the Styles section to prepend to each page
-      const stylesMatch = hocrStrAll.match(/<Styles>[\s\S]*?<\/Styles>/i);
+      const stylesMatch = ocrFilesContent.match(/<Styles>[\s\S]*?<\/Styles>/i);
       const stylesSection = stylesMatch ? stylesMatch[0] : '';
 
       // Split by Page elements
-      const pages = hocrStrAll.split(/(?=<Page\s)/).slice(1);
+      const pages = ocrFilesContent.split(/(?=<Page\s)/).slice(1);
 
       // Prepend Styles section to each page so font lookups work
       hocrRaw = pages.map((page) => stylesSection + page);
     } else if (format === 'abbyy') {
-      hocrRaw = hocrStrAll.split(/(?=<page)/).slice(1);
+      hocrRaw = ocrFilesContent.split(/(?=<page)/).slice(1);
     } else if (format === 'stext') {
-      hocrRaw = hocrStrAll.split(/(?=<page)/).slice(1);
+      hocrRaw = ocrFilesContent.split(/(?=<page)/).slice(1);
     } else if (format === 'text') {
-      hocrRaw = [hocrStrAll];
+      hocrRaw = [ocrFilesContent];
+    } else if (format === 'docx') {
+      // For .docx, pass the full file contents to the read function.
+      hocrRaw = [ocrFilesContent];
     } else if (format === 'hocr') {
       // `hocrStrStart` will be missing for individual HOCR pages created with Tesseract.js or the Tesseract API.
-      hocrStrStart = hocrStrAll.match(/[\s\S]*?<body>/)?.[0];
-      hocrRaw = splitHOCRStr(hocrStrAll);
+      hocrStrStart = ocrFilesContent.match(/[\s\S]*?<body>/)?.[0];
+      hocrRaw = splitHOCRStr(ocrFilesContent);
     }
 
     pageCountHOCR = hocrRaw.length;
diff --git a/js/recognizeConvert.js b/js/recognizeConvert.js
index 142ea66..d5e69be 100644
--- a/js/recognizeConvert.js
+++ b/js/recognizeConvert.js
@@ -314,6 +314,9 @@ export async function convertOCRPage(ocrRaw, n, mainData, format, engineName, sc
     res = await gs.convertPageStext({ ocrStr: ocrRaw, n });
   } else if (format === 'text') {
     res = await gs.convertPageText({ textStr: ocrRaw });
+  } else if (format === 'docx') {
+    console.error('format does not support page-level import.');
+    // res = await gs.convertDocDocx({ docxData: ocrRaw });
   } else {
     throw new Error(`Invalid format: ${format}`);
   }
@@ -421,6 +424,25 @@ export async function convertOCR(ocrRawArr, mainData, format, engineName, scribe
     return;
   }
 
+  if (format === 'docx') {
+    const res = await gs.convertDocDocx({ docxData: ocrRawArr[0] });
+
+    if (res.length > inputData.pageCount) inputData.pageCount = res.length;
+
+    for (let i = 0; i < res.length; i++) {
+      if (!layoutRegions.pages[i]) layoutRegions.pages[i] = new LayoutPage(i);
+    }
+
+    for (let i = 0; i < res.length; i++) {
+      if (!layoutDataTables.pages[i]) layoutDataTables.pages[i] = new LayoutDataTablePage(i);
+    }
+
+    for (let n = 0; n < res.length; n++) {
+      await convertPageCallback(res[n], n, mainData, engineName);
+    }
+    return;
+  }
+
   for (let n = 0; n < ocrRawArr.length; n++) {
     promiseArr.push(convertOCRPage(ocrRawArr[n], n, mainData, format, engineName, scribeMode));
   }
diff --git a/js/worker/generalWorker.js b/js/worker/generalWorker.js
index 9dacda9..1d93d2e 100644
--- a/js/worker/generalWorker.js
+++ b/js/worker/generalWorker.js
@@ -7,6 +7,7 @@ import { convertDocTextract } from '../import/convertDocTextract.js';
 import { convertDocAzureDocIntel } from '../import/convertDocAzureDocIntel.js';
 import { convertPageGoogleVision } from '../import/convertPageGoogleVision.js';
 import { convertPageText } from '../import/convertPageText.js';
+import { convertDocDocx } from '../import/convertDocDocx.js';
 
 import { FontCont, loadFontsFromSource } from '../containers/fontContainer.js';
 import {
@@ -409,6 +410,7 @@ const handleMessage = async (data) => {
     convertPageGoogleVision,
     convertPageBlocks,
     convertPageText,
+    convertDocDocx,
 
     // Optimize font functions
     optimizeFont,
diff --git a/tests/assets/simple_paragraph.alto.xml b/tests/assets/simple_paragraph.alto.xml
new file mode 100644
index 0000000..272283e
--- /dev/null
+++ b/tests/assets/simple_paragraph.alto.xml
@@ -0,0 +1,39 @@
+﻿<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<alto xmlns="http://www.loc.gov/standards/alto/ns-v2#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd">
+<Description>
+<MeasurementUnit>pixel</MeasurementUnit>
+<OCRProcessing ID="IdOcr"><ocrProcessingStep><processingDateTime>2025-12-06</processingDateTime><processingSoftware><softwareCreator>ABBYY</softwareCreator><softwareName>ABBYY FineReader Engine</softwareName><softwareVersion>12</softwareVersion></processingSoftware></ocrProcessingStep></OCRProcessing>
+</Description>
+<Styles><TextStyle ID="font0" FONTFAMILY="Times New Roman" FONTSIZE="24"/>
+</Styles>
+<Layout>
+<Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="529" WIDTH="1207">
+<TopMargin HEIGHT="8" WIDTH="1207" VPOS="0" HPOS="0">
+</TopMargin>
+<LeftMargin HEIGHT="508" WIDTH="21" VPOS="8" HPOS="0">
+</LeftMargin>
+<RightMargin HEIGHT="508" WIDTH="23" VPOS="8" HPOS="1184">
+</RightMargin>
+<BottomMargin HEIGHT="13" WIDTH="1207" VPOS="516" HPOS="0">
+</BottomMargin>
+<PrintSpace HEIGHT="508" WIDTH="1163" VPOS="8" HPOS="21">
+<TextBlock ID="Page1_Block1" HEIGHT="503" WIDTH="1161" VPOS="13" HPOS="22" language="en-US" STYLEREFS="font0">
+<TextLine HEIGHT="31" WIDTH="1153" VPOS="15" HPOS="25"><String WC="0.88999998569488525" CONTENT="JNJ" HEIGHT="23" WIDTH="47" VPOS="16" HPOS="25"/><SP WIDTH="11" VPOS="16" HPOS="73"/><String WC="0.60777777433395386" CONTENT="announced" HEIGHT="23" WIDTH="140" VPOS="16" HPOS="85"/><SP WIDTH="11" VPOS="16" HPOS="226"/><String WC="0.55250000953674316" CONTENT="this" HEIGHT="23" WIDTH="44" VPOS="16" HPOS="238"/><SP WIDTH="11" VPOS="22" HPOS="283"/><String WC="0.69571429491043091" CONTENT="morning" HEIGHT="30" WIDTH="110" VPOS="16" HPOS="295"/><SP WIDTH="9" VPOS="19" HPOS="406"/><String WC="0.6600000262260437" CONTENT="the" HEIGHT="23" WIDTH="38" VPOS="16" HPOS="416"/><SP WIDTH="10" VPOS="23" HPOS="455"/><String WC="0.8054545521736145" CONTENT="acquisition" HEIGHT="30" WIDTH="142" VPOS="16" HPOS="466"/><SP WIDTH="10" VPOS="23" HPOS="609"/><String WC="0.66500002145767212" CONTENT="of" HEIGHT="23" WIDTH="29" VPOS="16" HPOS="620"/><SP WIDTH="7" VPOS="16" HPOS="650"/><String WC="0.85333335399627686" CONTENT="privately-he" HEIGHT="30" WIDTH="156" VPOS="16" HPOS="658"/><SP WIDTH="1" VPOS="16" HPOS="815"/><String WC="1" CONTENT="id" HEIGHT="23" WIDTH="23" VPOS="16" HPOS="817"/><SP WIDTH="11" VPOS="16" HPOS="841"/><String WC="0.90666669607162476" CONTENT="Aragon" HEIGHT="30" WIDTH="97" VPOS="16" HPOS="853"/><SP WIDTH="8" VPOS="16" HPOS="951"/><String WC="0.81999999284744263" CONTENT="for" HEIGHT="23" WIDTH="39" VPOS="16" HPOS="960"/><SP WIDTH="9" VPOS="15" HPOS="1000"/><String WC="0.73250001668930054" CONTENT="$650" HEIGHT="26" WIDTH="63" VPOS="15" HPOS="1010"/><SP WIDTH="11" VPOS="16" HPOS="1074"/><String WC="0.77285712957382202" CONTENT="million" HEIGHT="23" WIDTH="92" VPOS="16" HPOS="1086"/></TextLine>
+<TextLine HEIGHT="31" WIDTH="1154" VPOS="51" HPOS="24"><String WC="1" CONTENT="with" HEIGHT="23" WIDTH="59" VPOS="52" HPOS="24"/><SP WIDTH="10" VPOS="52" HPOS="84"/><String WC="1" CONTENT="a" HEIGHT="16" WIDTH="13" VPOS="59" HPOS="95"/><SP WIDTH="11" VPOS="51" HPOS="109"/><String WC="0.70249998569488525" CONTENT="$350" HEIGHT="26" WIDTH="63" VPOS="51" HPOS="121"/><SP WIDTH="12" VPOS="52" HPOS="185"/><String WC="0.87000000476837158" CONTENT="million" HEIGHT="23" WIDTH="94" VPOS="52" HPOS="198"/><SP WIDTH="9" VPOS="59" HPOS="293"/><String WC="0.86400002241134644" CONTENT="contingent" HEIGHT="30" WIDTH="138" VPOS="52" HPOS="303"/><SP WIDTH="10" VPOS="52" HPOS="442"/><String WC="0.89818179607391357" CONTENT="development" HEIGHT="30" WIDTH="168" VPOS="52" HPOS="453"/><SP WIDTH="11" VPOS="55" HPOS="622"/><String WC="0.86100000143051147" CONTENT="milestone." HEIGHT="24" WIDTH="131" VPOS="52" HPOS="634"/><SP WIDTH="13" VPOS="52" HPOS="766"/><String WC="1" CONTENT="As" HEIGHT="24" WIDTH="35" VPOS="52" HPOS="780"/><SP WIDTH="11" VPOS="59" HPOS="816"/><String WC="1" CONTENT="a" HEIGHT="17" WIDTH="13" VPOS="59" HPOS="828"/><SP WIDTH="12" VPOS="59" HPOS="842"/><String WC="0.8355555534362793" CONTENT="reminder," HEIGHT="28" WIDTH="123" VPOS="52" HPOS="855"/><SP WIDTH="12" VPOS="52" HPOS="979"/><String WC="0.86250001192092896" CONTENT="Aragon’s" HEIGHT="30" WIDTH="120" VPOS="52" HPOS="992"/><SP WIDTH="12" VPOS="52" HPOS="1113"/><String WC="1" CONTENT="lead" HEIGHT="23" WIDTH="52" VPOS="52" HPOS="1126"/></TextLine>
+<TextLine HEIGHT="38" WIDTH="1155" VPOS="80" HPOS="25"><String WC="0.90222221612930298" CONTENT="compound." HEIGHT="30" WIDTH="143" VPOS="88" HPOS="25"/><SP WIDTH="12" VPOS="88" HPOS="169"/><String WC="0.82499998807907104" CONTENT="ARN-509," HEIGHT="29" WIDTH="135" VPOS="88" HPOS="182"/><SP WIDTH="11" VPOS="88" HPOS="318"/><String WC="1" CONTENT="is" HEIGHT="24" WIDTH="20" VPOS="88" HPOS="330"/><SP WIDTH="10" VPOS="95" HPOS="351"/><String WC="0.90333330631256104" CONTENT="viewed" HEIGHT="24" WIDTH="93" VPOS="88" HPOS="362"/><SP WIDTH="11" VPOS="88" HPOS="456"/><String WC="1" CONTENT="as" HEIGHT="17" WIDTH="26" VPOS="95" HPOS="468"/><SP WIDTH="10" VPOS="95" HPOS="495"/><String WC="1" CONTENT="a" HEIGHT="17" WIDTH="14" VPOS="95" HPOS="506"/><SP WIDTH="10" VPOS="88" HPOS="521"/><String WC="0.74166667461395264" CONTENT="direct" HEIGHT="24" WIDTH="74" VPOS="88" HPOS="532"/><SP WIDTH="9" VPOS="89" HPOS="607"/><String WC="0.89999997615814209" CONTENT="2" HEIGHT="22" WIDTH="14" VPOS="89" HPOS="617"/><String STYLE="superscript" WC="0.31000000238418579" CONTENT="nd" HEIGHT="16" WIDTH="22" VPOS="80" HPOS="633"/><SP WIDTH="11" VPOS="80" HPOS="656"/><String WC="0.88899999856948853" CONTENT="generation" HEIGHT="30" WIDTH="138" VPOS="88" HPOS="668"/><SP WIDTH="8" VPOS="95" HPOS="807"/><String WC="0.8787500262260437" CONTENT="androgen" HEIGHT="30" WIDTH="122" VPOS="88" HPOS="816"/><SP WIDTH="10" VPOS="95" HPOS="939"/><String WC="0.89375001192092896" CONTENT="receptor" HEIGHT="26" WIDTH="106" VPOS="92" HPOS="950"/><SP WIDTH="9" VPOS="88" HPOS="1057"/><String WC="0.73333334922790527" CONTENT="inhibitor" HEIGHT="25" WIDTH="113" VPOS="87" HPOS="1067"/></TextLine>
+<TextLine HEIGHT="30" WIDTH="1153" VPOS="124" HPOS="25"><String WC="0.94900000095367432" CONTENT="competitor" HEIGHT="30" WIDTH="142" VPOS="124" HPOS="25"/><SP WIDTH="11" VPOS="127" HPOS="168"/><String WC="0.82999998331069946" CONTENT="to" HEIGHT="21" WIDTH="23" VPOS="127" HPOS="180"/><SP WIDTH="16" VPOS="125" HPOS="204"/><String WC="1" CONTENT="Xtandi." HEIGHT="24" WIDTH="89" VPOS="124" HPOS="221"/><SP WIDTH="18" VPOS="124" HPOS="311"/><String WC="0.90200001001358032" CONTENT="Also," HEIGHT="25" WIDTH="68" VPOS="124" HPOS="330"/><SP WIDTH="13" VPOS="131" HPOS="399"/><String WC="0.78666669130325317" CONTENT="recall" HEIGHT="24" WIDTH="71" VPOS="124" HPOS="413"/><SP WIDTH="13" VPOS="124" HPOS="485"/><String WC="0.64999997615814209" CONTENT="that" HEIGHT="24" WIDTH="49" VPOS="124" HPOS="499"/><SP WIDTH="12" VPOS="124" HPOS="549"/><String WC="1" CONTENT="MDVN" HEIGHT="24" WIDTH="96" VPOS="124" HPOS="562"/><SP WIDTH="16" VPOS="124" HPOS="659"/><String WC="1" CONTENT="is" HEIGHT="24" WIDTH="20" VPOS="124" HPOS="676"/><SP WIDTH="14" VPOS="131" HPOS="697"/><String WC="0.85399997234344482" CONTENT="suing" HEIGHT="30" WIDTH="69" VPOS="124" HPOS="712"/><SP WIDTH="13" VPOS="124" HPOS="782"/><String WC="0.91166669130325317" CONTENT="Aragon" HEIGHT="30" WIDTH="97" VPOS="124" HPOS="796"/><SP WIDTH="12" VPOS="131" HPOS="894"/><String WC="1" CONTENT="as" HEIGHT="17" WIDTH="26" VPOS="131" HPOS="907"/><SP WIDTH="13" VPOS="124" HPOS="934"/><String WC="0.70999997854232788" CONTENT="it" HEIGHT="23" WIDTH="18" VPOS="124" HPOS="948"/><SP WIDTH="12" VPOS="128" HPOS="967"/><String WC="0.99333333969116211" CONTENT="claims" HEIGHT="24" WIDTH="84" VPOS="124" HPOS="980"/><SP WIDTH="14" VPOS="128" HPOS="1065"/><String WC="0.68500000238418579" CONTENT="to" HEIGHT="20" WIDTH="22" VPOS="128" HPOS="1080"/><SP WIDTH="15" VPOS="124" HPOS="1103"/><String WC="0.91750001907348633" CONTENT="have" HEIGHT="24" WIDTH="59" VPOS="124" HPOS="1119"/></TextLine>
+<TextLine HEIGHT="30" WIDTH="1152" VPOS="159" HPOS="25"><String WC="0.72333335876464844" CONTENT="rights" HEIGHT="30" WIDTH="73" VPOS="159" HPOS="25"/><SP WIDTH="14" VPOS="163" HPOS="99"/><String WC="0.66500002145767212" CONTENT="to" HEIGHT="20" WIDTH="23" VPOS="163" HPOS="114"/><SP WIDTH="15" VPOS="160" HPOS="138"/><String WC="0.81875002384185791" CONTENT="ARN-509." HEIGHT="23" WIDTH="133" VPOS="160" HPOS="154"/><SP WIDTH="16" VPOS="160" HPOS="288"/><String WC="0.64999997615814209" CONTENT="Having" HEIGHT="29" WIDTH="95" VPOS="160" HPOS="305"/><SP WIDTH="14" VPOS="159" HPOS="401"/><String WC="0.72500002384185791" CONTENT="lost" HEIGHT="24" WIDTH="46" VPOS="159" HPOS="416"/><SP WIDTH="14" VPOS="163" HPOS="463"/><String WC="0.78666669130325317" CONTENT="the" HEIGHT="24" WIDTH="37" VPOS="159" HPOS="478"/><SP WIDTH="15" VPOS="159" HPOS="516"/><String WC="0.75199997425079346" CONTENT="first" HEIGHT="24" WIDTH="52" VPOS="159" HPOS="532"/><SP WIDTH="14" VPOS="163" HPOS="585"/><String WC="0.88999998569488525" CONTENT="round" HEIGHT="24" WIDTH="74" VPOS="159" HPOS="600"/><SP WIDTH="13" VPOS="159" HPOS="675"/><String WC="1" CONTENT="of" HEIGHT="24" WIDTH="29" VPOS="159" HPOS="689"/><SP WIDTH="11" VPOS="159" HPOS="719"/><String WC="0.8841666579246521" CONTENT="proceedings." HEIGHT="30" WIDTH="164" VPOS="159" HPOS="731"/><SP WIDTH="14" VPOS="160" HPOS="896"/><String WC="0.70749998092651367" CONTENT="MDVN" HEIGHT="22" WIDTH="97" VPOS="160" HPOS="911"/><SP WIDTH="16" VPOS="159" HPOS="1009"/><String WC="1" CONTENT="is" HEIGHT="24" WIDTH="20" VPOS="159" HPOS="1026"/><SP WIDTH="14" VPOS="166" HPOS="1047"/><String WC="0.77666664123535156" CONTENT="currently" HEIGHT="30" WIDTH="115" VPOS="159" HPOS="1062"/></TextLine>
+<TextLine HEIGHT="30" WIDTH="1153" VPOS="195" HPOS="25"><String WC="0.7822222113609314" CONTENT="appealing" HEIGHT="30" WIDTH="126" VPOS="195" HPOS="25"/><SP WIDTH="16" VPOS="199" HPOS="152"/><String WC="0.69999998807907104" CONTENT="the" HEIGHT="24" WIDTH="39" VPOS="195" HPOS="169"/><SP WIDTH="17" VPOS="196" HPOS="209"/><String WC="0.88499999046325684" CONTENT="December" HEIGHT="24" WIDTH="134" VPOS="195" HPOS="227"/><SP WIDTH="16" VPOS="196" HPOS="362"/><String WC="0.95249998569488525" CONTENT="2012" HEIGHT="22" WIDTH="63" VPOS="196" HPOS="379"/><SP WIDTH="18" VPOS="196" HPOS="443"/><String WC="0.84777778387069702" CONTENT="decision." HEIGHT="24" WIDTH="114" VPOS="195" HPOS="462"/><SP WIDTH="19" VPOS="196" HPOS="577"/><String WC="0.81000000238418579" CONTENT="The" HEIGHT="24" WIDTH="48" VPOS="195" HPOS="597"/><SP WIDTH="17" VPOS="202" HPOS="646"/><String WC="0.8787500262260437" CONTENT="weakness" HEIGHT="24" WIDTH="125" VPOS="195" HPOS="664"/><SP WIDTH="18" VPOS="195" HPOS="790"/><String WC="0.64999997615814209" CONTENT="in" HEIGHT="23" WIDTH="24" VPOS="195" HPOS="809"/><SP WIDTH="16" VPOS="196" HPOS="834"/><String WC="1" CONTENT="MDVN" HEIGHT="22" WIDTH="98" VPOS="196" HPOS="851"/><SP WIDTH="18" VPOS="196" HPOS="950"/><String WC="0.8399999737739563" CONTENT="shares" HEIGHT="24" WIDTH="80" VPOS="195" HPOS="969"/><SP WIDTH="17" VPOS="199" HPOS="1050"/><String WC="0.77999997138977051" CONTENT="today" HEIGHT="29" WIDTH="70" VPOS="196" HPOS="1068"/><SP WIDTH="19" VPOS="195" HPOS="1139"/><String WC="1" CONTENT="is" HEIGHT="24" WIDTH="19" VPOS="195" HPOS="1159"/></TextLine>
+<TextLine HEIGHT="31" WIDTH="1155" VPOS="231" HPOS="24"><String WC="0.77714288234710693" CONTENT="because" HEIGHT="23" WIDTH="102" VPOS="232" HPOS="24"/><SP WIDTH="11" VPOS="232" HPOS="127"/><String WC="1" CONTENT="JNJ" HEIGHT="23" WIDTH="47" VPOS="232" HPOS="139"/><SP WIDTH="11" VPOS="231" HPOS="187"/><String WC="0.6966666579246521" CONTENT="has" HEIGHT="24" WIDTH="41" VPOS="231" HPOS="199"/><SP WIDTH="11" VPOS="238" HPOS="241"/><String WC="0.72000002861022949" CONTENT="strengthened" HEIGHT="30" WIDTH="165" VPOS="231" HPOS="253"/><SP WIDTH="10" VPOS="231" HPOS="419"/><String WC="0.81999999284744263" CONTENT="its" HEIGHT="24" WIDTH="29" VPOS="231" HPOS="430"/><SP WIDTH="10" VPOS="238" HPOS="460"/><String WC="0.84375" CONTENT="strategy" HEIGHT="26" WIDTH="100" VPOS="235" HPOS="471"/><SP WIDTH="12" VPOS="231" HPOS="572"/><String WC="0.62999999523162842" CONTENT="in" HEIGHT="23" WIDTH="25" VPOS="231" HPOS="585"/><SP WIDTH="8" VPOS="238" HPOS="611"/><String WC="0.79750001430511475" CONTENT="prostate" HEIGHT="26" WIDTH="102" VPOS="235" HPOS="620"/><SP WIDTH="10" VPOS="238" HPOS="723"/><String WC="0.61166667938232422" CONTENT="cancer" HEIGHT="17" WIDTH="85" VPOS="238" HPOS="734"/><SP WIDTH="8" VPOS="238" HPOS="820"/><String WC="0.70200002193450928" CONTENT="ahead" HEIGHT="24" WIDTH="76" VPOS="231" HPOS="829"/><SP WIDTH="10" VPOS="231" HPOS="906"/><String WC="1" CONTENT="of" HEIGHT="24" WIDTH="29" VPOS="231" HPOS="917"/><SP WIDTH="7" VPOS="231" HPOS="947"/><String WC="0.63666665554046631" CONTENT="the" HEIGHT="24" WIDTH="37" VPOS="231" HPOS="955"/><SP WIDTH="10" VPOS="232" HPOS="993"/><String WC="0.88499999046325684" CONTENT="Zytiga" HEIGHT="31" WIDTH="85" VPOS="231" HPOS="1004"/><SP WIDTH="9" VPOS="238" HPOS="1090"/><String WC="0.91333335638046265" CONTENT="patent" HEIGHT="26" WIDTH="79" VPOS="235" HPOS="1100"/></TextLine>
+<TextLine HEIGHT="31" WIDTH="1151" VPOS="267" HPOS="27"><String WC="1" CONTENT="expiry" HEIGHT="31" WIDTH="76" VPOS="267" HPOS="27"/><SP WIDTH="15" VPOS="268" HPOS="104"/><String WC="0.44499999284744263" CONTENT="in" HEIGHT="22" WIDTH="25" VPOS="268" HPOS="120"/><SP WIDTH="10" VPOS="268" HPOS="146"/><String WC="0.82400000095367432" CONTENT="2014." HEIGHT="24" WIDTH="71" VPOS="268" HPOS="157"/><SP WIDTH="12" VPOS="274" HPOS="229"/><String WC="0.68999999761581421" CONTENT="and" HEIGHT="23" WIDTH="46" VPOS="268" HPOS="242"/><SP WIDTH="11" VPOS="268" HPOS="289"/><String WC="0.79500001668930054" CONTENT="as" HEIGHT="17" WIDTH="25" VPOS="274" HPOS="301"/><SP WIDTH="12" VPOS="268" HPOS="327"/><String WC="0.79500001668930054" CONTENT="it" HEIGHT="23" WIDTH="17" VPOS="268" HPOS="340"/><SP WIDTH="11" VPOS="271" HPOS="358"/><String WC="1" CONTENT="may" HEIGHT="23" WIDTH="51" VPOS="274" HPOS="370"/><SP WIDTH="14" VPOS="267" HPOS="422"/><String WC="1" CONTENT="be" HEIGHT="24" WIDTH="30" VPOS="267" HPOS="437"/><SP WIDTH="10" VPOS="274" HPOS="468"/><String WC="0.91111111640930176" CONTENT="perceived" HEIGHT="30" WIDTH="127" VPOS="267" HPOS="479"/><SP WIDTH="10" VPOS="268" HPOS="607"/><String WC="0.60250002145767212" CONTENT="that" HEIGHT="23" WIDTH="49" VPOS="268" HPOS="618"/><SP WIDTH="10" VPOS="268" HPOS="668"/><String WC="0.97333335876464844" CONTENT="MDVN’s" HEIGHT="25" WIDTH="122" VPOS="267" HPOS="679"/><SP WIDTH="11" VPOS="267" HPOS="802"/><String WC="1" CONTENT="legal" HEIGHT="30" WIDTH="62" VPOS="267" HPOS="814"/><SP WIDTH="11" VPOS="267" HPOS="877"/><String WC="0.87166666984558105" CONTENT="claims" HEIGHT="23" WIDTH="84" VPOS="268" HPOS="889"/><SP WIDTH="12" VPOS="274" HPOS="974"/><String WC="0.84500002861022949" CONTENT="on" HEIGHT="16" WIDTH="31" VPOS="275" HPOS="987"/><SP WIDTH="11" VPOS="268" HPOS="1019"/><String WC="0.87833333015441895" CONTENT="Aragon" HEIGHT="29" WIDTH="97" VPOS="268" HPOS="1031"/><SP WIDTH="10" VPOS="274" HPOS="1129"/><String WC="0.60000002384185791" CONTENT="arc" HEIGHT="17" WIDTH="38" VPOS="274" HPOS="1140"/></TextLine>
+<TextLine HEIGHT="31" WIDTH="1154" VPOS="303" HPOS="24"><String WC="0.94199997186660767" CONTENT="weak." HEIGHT="24" WIDTH="75" VPOS="303" HPOS="24"/><SP WIDTH="11" VPOS="304" HPOS="100"/><String WC="1" CONTENT="We" HEIGHT="23" WIDTH="43" VPOS="304" HPOS="112"/><SP WIDTH="10" VPOS="310" HPOS="156"/><String WC="0.95142859220504761" CONTENT="suggest" HEIGHT="27" WIDTH="96" VPOS="307" HPOS="167"/><SP WIDTH="7" VPOS="304" HPOS="264"/><String WC="0.91166669130325317" CONTENT="buying" HEIGHT="30" WIDTH="91" VPOS="304" HPOS="272"/><SP WIDTH="8" VPOS="304" HPOS="364"/><String WC="0.95499998331069946" CONTENT="MDVN" HEIGHT="23" WIDTH="99" VPOS="304" HPOS="373"/><SP WIDTH="9" VPOS="304" HPOS="473"/><String WC="0.59166663885116577" CONTENT="shares" HEIGHT="25" WIDTH="79" VPOS="303" HPOS="483"/><SP WIDTH="10" VPOS="304" HPOS="563"/><String WC="0.75499999523162842" CONTENT="in" HEIGHT="22" WIDTH="25" VPOS="304" HPOS="574"/><SP WIDTH="7" VPOS="304" HPOS="600"/><String WC="0.90799999237060547" CONTENT="light" HEIGHT="30" WIDTH="59" VPOS="304" HPOS="608"/><SP WIDTH="8" VPOS="307" HPOS="668"/><String WC="1" CONTENT="of" HEIGHT="24" WIDTH="29" VPOS="303" HPOS="677"/><SP WIDTH="5" VPOS="303" HPOS="707"/><String WC="0.92000001668930054" CONTENT="today’s" HEIGHT="29" WIDTH="95" VPOS="304" HPOS="713"/><SP WIDTH="8" VPOS="310" HPOS="809"/><String WC="0.84666669368743896" CONTENT="pullback," HEIGHT="30" WIDTH="119" VPOS="304" HPOS="818"/><SP WIDTH="10" VPOS="310" HPOS="938"/><String WC="0.76499998569488525" CONTENT="as" HEIGHT="17" WIDTH="26" VPOS="310" HPOS="949"/><SP WIDTH="8" VPOS="310" HPOS="976"/><String WC="0.92000001668930054" CONTENT="we" HEIGHT="17" WIDTH="37" VPOS="310" HPOS="985"/><SP WIDTH="9" VPOS="310" HPOS="1023"/><String WC="0.76999998092651367" CONTENT="continue" HEIGHT="23" WIDTH="110" VPOS="304" HPOS="1033"/><SP WIDTH="9" VPOS="307" HPOS="1144"/><String WC="0.94499999284744263" CONTENT="to" HEIGHT="20" WIDTH="24" VPOS="307" HPOS="1154"/></TextLine>
+<TextLine HEIGHT="30" WIDTH="1153" VPOS="340" HPOS="25"><String WC="0.86000001430511475" CONTENT="expect" HEIGHT="27" WIDTH="84" VPOS="343" HPOS="25"/><SP WIDTH="11" VPOS="343" HPOS="110"/><String WC="0.79714286327362061" CONTENT="minimal" HEIGHT="23" WIDTH="107" VPOS="340" HPOS="122"/><SP WIDTH="12" VPOS="340" HPOS="230"/><String WC="0.86333334445953369" CONTENT="impact" HEIGHT="30" WIDTH="88" VPOS="340" HPOS="243"/><SP WIDTH="10" VPOS="343" HPOS="332"/><String WC="1" CONTENT="of" HEIGHT="23" WIDTH="29" VPOS="340" HPOS="343"/><SP WIDTH="9" VPOS="340" HPOS="373"/><String WC="0.9471428394317627" CONTENT="ARN-509" HEIGHT="23" WIDTH="127" VPOS="340" HPOS="383"/><SP WIDTH="12" VPOS="340" HPOS="511"/><String WC="0.83499997854232788" CONTENT="on" HEIGHT="16" WIDTH="32" VPOS="347" HPOS="524"/><SP WIDTH="9" VPOS="340" HPOS="557"/><String WC="0.90833336114883423" CONTENT="Xtandi" HEIGHT="23" WIDTH="88" VPOS="340" HPOS="567"/><SP WIDTH="12" VPOS="340" HPOS="656"/><String WC="0.87999999523162842" CONTENT="in" HEIGHT="23" WIDTH="25" VPOS="340" HPOS="669"/><SP WIDTH="9" VPOS="343" HPOS="695"/><String WC="0.82333332300186157" CONTENT="the" HEIGHT="23" WIDTH="39" VPOS="340" HPOS="705"/><SP WIDTH="12" VPOS="347" HPOS="745"/><String WC="0.80363637208938599" CONTENT="marketplace" HEIGHT="30" WIDTH="158" VPOS="340" HPOS="758"/><SP WIDTH="11" VPOS="340" HPOS="917"/><String WC="0.83428573608398438" CONTENT="because" HEIGHT="23" WIDTH="102" VPOS="340" HPOS="929"/><SP WIDTH="11" VPOS="346" HPOS="1032"/><String WC="1" CONTENT="of" HEIGHT="23" WIDTH="30" VPOS="340" HPOS="1044"/><SP WIDTH="8" VPOS="340" HPOS="1075"/><String WC="0.6912500262260437" CONTENT="clinical" HEIGHT="23" WIDTH="94" VPOS="340" HPOS="1084"/></TextLine>
+<TextLine HEIGHT="30" WIDTH="1154" VPOS="375" HPOS="25"><String WC="0.87666666507720947" CONTENT="and" HEIGHT="24" WIDTH="45" VPOS="375" HPOS="25"/><SP WIDTH="13" VPOS="375" HPOS="71"/><String WC="0.76800000667572021" CONTENT="regulatory" HEIGHT="30" WIDTH="128" VPOS="375" HPOS="85"/><SP WIDTH="16" VPOS="382" HPOS="214"/><String WC="0.88499999046325684" CONTENT="challenges" HEIGHT="30" WIDTH="136" VPOS="375" HPOS="231"/><SP WIDTH="12" VPOS="375" HPOS="368"/><String WC="0.90333330631256104" CONTENT="facing" HEIGHT="30" WIDTH="80" VPOS="375" HPOS="381"/><SP WIDTH="13" VPOS="375" HPOS="462"/><String WC="0.94999998807907104" CONTENT="ARN-509" HEIGHT="23" WIDTH="126" VPOS="375" HPOS="476"/><SP WIDTH="12" VPOS="375" HPOS="603"/><String WC="0.57333332300186157" CONTENT="and" HEIGHT="24" WIDTH="46" VPOS="375" HPOS="616"/><SP WIDTH="11" VPOS="375" HPOS="663"/><String WC="0.79874998331069946" CONTENT="Xtandi’s" HEIGHT="24" WIDTH="112" VPOS="375" HPOS="675"/><SP WIDTH="11" VPOS="375" HPOS="788"/><String WC="0.79600000381469727" CONTENT="first" HEIGHT="24" WIDTH="52" VPOS="375" HPOS="800"/><SP WIDTH="11" VPOS="378" HPOS="853"/><String WC="0.89800000190734863" CONTENT="mover" HEIGHT="17" WIDTH="84" VPOS="382" HPOS="865"/><SP WIDTH="9" VPOS="382" HPOS="950"/><String WC="0.79111111164093018" CONTENT="advantage" HEIGHT="30" WIDTH="132" VPOS="375" HPOS="960"/><SP WIDTH="12" VPOS="382" HPOS="1093"/><String WC="1" CONTENT="of" HEIGHT="24" WIDTH="29" VPOS="375" HPOS="1106"/><SP WIDTH="9" VPOS="375" HPOS="1136"/><String WC="0.90499997138977051" CONTENT="5+" HEIGHT="22" WIDTH="33" VPOS="376" HPOS="1146"/></TextLine>
+<TextLine HEIGHT="30" WIDTH="1154" VPOS="411" HPOS="24"><String WC="0.93599998950958252" CONTENT="years" HEIGHT="23" WIDTH="68" VPOS="418" HPOS="24"/><SP WIDTH="8" VPOS="418" HPOS="93"/><String WC="0.9100000262260437" CONTENT="over" HEIGHT="17" WIDTH="59" VPOS="418" HPOS="102"/><SP WIDTH="6" VPOS="411" HPOS="162"/><String WC="1" CONTENT="ARN-509" HEIGHT="23" WIDTH="127" VPOS="411" HPOS="169"/><SP WIDTH="8" VPOS="411" HPOS="297"/><String WC="0.42500001192092896" CONTENT="in" HEIGHT="23" WIDTH="24" VPOS="411" HPOS="306"/><SP WIDTH="7" VPOS="418" HPOS="331"/><String WC="0.9407692551612854" CONTENT="earlier-stage" HEIGHT="30" WIDTH="161" VPOS="411" HPOS="339"/><SP WIDTH="7" VPOS="418" HPOS="501"/><String WC="0.82249999046325684" CONTENT="prostate" HEIGHT="27" WIDTH="102" VPOS="414" HPOS="509"/><SP WIDTH="9" VPOS="418" HPOS="612"/><String WC="0.8162500262260437" CONTENT="cancers." HEIGHT="17" WIDTH="103" VPOS="418" HPOS="622"/><SP WIDTH="9" VPOS="412" HPOS="726"/><String WC="0.89499998092651367" CONTENT="Next" HEIGHT="23" WIDTH="63" VPOS="412" HPOS="736"/><SP WIDTH="7" VPOS="415" HPOS="800"/><String WC="0.84555554389953613" CONTENT="catalysts" HEIGHT="30" WIDTH="111" VPOS="411" HPOS="808"/><SP WIDTH="9" VPOS="411" HPOS="920"/><String WC="1" CONTENT="for" HEIGHT="24" WIDTH="37" VPOS="411" HPOS="930"/><SP WIDTH="6" VPOS="411" HPOS="968"/><String WC="1" CONTENT="MDVN" HEIGHT="24" WIDTH="99" VPOS="411" HPOS="975"/><SP WIDTH="8" VPOS="411" HPOS="1075"/><String WC="0.73571425676345825" CONTENT="include" HEIGHT="24" WIDTH="94" VPOS="411" HPOS="1084"/></TextLine>
+<TextLine HEIGHT="30" WIDTH="1152" VPOS="447" HPOS="25"><String WC="0.88333332538604736" CONTENT="quarterly" HEIGHT="30" WIDTH="115" VPOS="447" HPOS="25"/><SP WIDTH="15" VPOS="454" HPOS="141"/><String WC="0.85124999284744263" CONTENT="earnings" HEIGHT="30" WIDTH="109" VPOS="447" HPOS="157"/><SP WIDTH="14" VPOS="447" HPOS="267"/><String WC="0.69499999284744263" CONTENT="in" HEIGHT="23" WIDTH="24" VPOS="447" HPOS="282"/><SP WIDTH="13" VPOS="448" HPOS="307"/><String WC="0.89399999380111694" CONTENT="July," HEIGHT="29" WIDTH="61" VPOS="448" HPOS="321"/><SP WIDTH="15" VPOS="447" HPOS="383"/><String WC="0.82714283466339111" CONTENT="interim" HEIGHT="24" WIDTH="93" VPOS="447" HPOS="399"/><SP WIDTH="13" VPOS="447" HPOS="493"/><String WC="0.94249999523162842" CONTENT="data" HEIGHT="24" WIDTH="53" VPOS="447" HPOS="507"/><SP WIDTH="14" VPOS="447" HPOS="561"/><String WC="1" CONTENT="from" HEIGHT="24" WIDTH="62" VPOS="447" HPOS="576"/><SP WIDTH="14" VPOS="448" HPOS="639"/><String WC="0.74285715818405151" CONTENT="PREVAIL" HEIGHT="22" WIDTH="135" VPOS="448" HPOS="654"/><SP WIDTH="15" VPOS="448" HPOS="790"/><String WC="0.87666666507720947" CONTENT="and" HEIGHT="23" WIDTH="45" VPOS="448" HPOS="806"/><SP WIDTH="14" VPOS="448" HPOS="852"/><String WC="0.92571431398391724" CONTENT="TERRAIN" HEIGHT="22" WIDTH="141" VPOS="448" HPOS="867"/><SP WIDTH="13" VPOS="447" HPOS="1009"/><String WC="1" CONTENT="data" HEIGHT="24" WIDTH="53" VPOS="447" HPOS="1023"/><SP WIDTH="15" VPOS="447" HPOS="1077"/><String WC="0.74000000953674316" CONTENT="in" HEIGHT="23" WIDTH="23" VPOS="447" HPOS="1093"/><SP WIDTH="14" VPOS="448" HPOS="1117"/><String WC="0.69333332777023315" CONTENT="2H," HEIGHT="27" WIDTH="45" VPOS="448" HPOS="1132"/></TextLine>
+<TextLine HEIGHT="30" WIDTH="892" VPOS="483" HPOS="25"><String WC="0.88625001907348633" CONTENT="followed" HEIGHT="24" WIDTH="115" VPOS="483" HPOS="25"/><SP WIDTH="6" VPOS="483" HPOS="141"/><String WC="1" CONTENT="by" HEIGHT="30" WIDTH="31" VPOS="483" HPOS="148"/><SP WIDTH="8" VPOS="490" HPOS="180"/><String WC="0.40999999642372131" CONTENT="a" HEIGHT="17" WIDTH="13" VPOS="490" HPOS="189"/><SP WIDTH="8" VPOS="490" HPOS="203"/><String WC="0.76111114025115967" CONTENT="potential" HEIGHT="30" WIDTH="113" VPOS="483" HPOS="212"/><SP WIDTH="8" VPOS="483" HPOS="326"/><String WC="0.87000000476837158" CONTENT="EU" HEIGHT="23" WIDTH="41" VPOS="484" HPOS="335"/><SP WIDTH="9" VPOS="484" HPOS="377"/><String WC="0.81499999761581421" CONTENT="Xtandi" HEIGHT="24" WIDTH="86" VPOS="483" HPOS="387"/><SP WIDTH="9" VPOS="483" HPOS="474"/><String WC="0.80374997854232788" CONTENT="approval" HEIGHT="30" WIDTH="113" VPOS="483" HPOS="484"/><SP WIDTH="8" VPOS="483" HPOS="598"/><String WC="0.68999999761581421" CONTENT="in" HEIGHT="23" WIDTH="23" VPOS="483" HPOS="607"/><SP WIDTH="8" VPOS="490" HPOS="631"/><String WC="1" CONTENT="mCRPC" HEIGHT="24" WIDTH="108" VPOS="483" HPOS="640"/><SP WIDTH="6" VPOS="483" HPOS="749"/><String WC="0.95499998331069946" CONTENT="by" HEIGHT="30" WIDTH="31" VPOS="483" HPOS="756"/><SP WIDTH="8" VPOS="491" HPOS="788"/><String WC="0.83888888359069824" CONTENT="year-end." HEIGHT="30" WIDTH="120" VPOS="483" HPOS="797"/></TextLine>
+</TextBlock>
+</PrintSpace>
+</Page>
+</Layout>
+</alto>
\ No newline at end of file
diff --git a/tests/module/exportAlto.spec.js b/tests/module/exportAlto.spec.js
new file mode 100644
index 0000000..730b058
--- /dev/null
+++ b/tests/module/exportAlto.spec.js
@@ -0,0 +1,187 @@
+// Relative imports are required to run in browser.
+/* eslint-disable import/no-relative-packages */
+import { assert, config } from '../../node_modules/chai/chai.js';
+import { writeAlto } from '../../js/export/writeAlto.js';
+import scribe from '../../scribe.js';
+import { ASSETS_PATH_KARMA } from '../constants.js';
+
+config.truncateThreshold = 0; // Disable truncation for actual/expected values on assertion failure.
+
+/**
+ * Reads a text file in any environment (browser or Node.js).
+ * @param {string} filePath
+ */
+async function readTextFileUniversal(filePath) {
+  if (typeof process !== 'undefined') {
+    const { promises: fsPromises } = await import('node:fs');
+    const contents = await fsPromises.readFile(filePath, 'utf-8');
+    return contents;
+  }
+
+  const response = await fetch(filePath);
+  if (!response.ok) {
+    throw new Error(`Failed to fetch file: ${filePath}`);
+  }
+  return await response.text();
+}
+
+/**
+ * Function to normalize and extract content for comparison
+ * @param {string} xmlStr
+ */
+const normalizeAlto = (xmlStr) => {
+  xmlStr = xmlStr.replace(/<processingDateTime>[^<]*<\/processingDateTime>/g, '');
+  xmlStr = xmlStr.replace(/<softwareCreator>[^<]*<\/softwareCreator>/g, '');
+  xmlStr = xmlStr.replace(/<softwareName>[^<]*<\/softwareName>/g, '');
+  xmlStr = xmlStr.replace(/<softwareVersion>[^<]*<\/softwareVersion>/g, '');
+
+  xmlStr = xmlStr.replace(/<TopMargin[^>]*>\s*<\/TopMargin>/g, '');
+  xmlStr = xmlStr.replace(/<LeftMargin[^>]*>\s*<\/LeftMargin>/g, '');
+  xmlStr = xmlStr.replace(/<RightMargin[^>]*>\s*<\/RightMargin>/g, '');
+  xmlStr = xmlStr.replace(/<BottomMargin[^>]*>\s*<\/BottomMargin>/g, '');
+
+  xmlStr = xmlStr.replace(/<PrintSpace[^>]*>/g, '<PrintSpace>');
+
+  // Remove position attributes from TextBlock.
+  // Some of the test data is from Abbyy,
+  // which does not tightly enclose TextBlock elements around text content.
+  // Therefore, position attributes may differ after re-exporting.
+  xmlStr = xmlStr.replace(/<TextBlock\s+([^>]*)>/g, (_match, attrs) => {
+    const newAttrs = attrs.replace(/\s*VPOS="[^"]*"\s*/g, ' ')
+      .replace(/\s*HPOS="[^"]*"\s*/g, ' ')
+      .replace(/\s*WIDTH="[^"]*"\s*/g, ' ')
+      .replace(/\s*HEIGHT="[^"]*"\s*/g, ' ')
+      .trim();
+    return `<TextBlock ${newAttrs}>`;
+  });
+
+  // Normalize confidence values to 2 decimal places
+  xmlStr = xmlStr.replace(/WC="(\d+(?:\.\d+)?)"/g, (_match, value) => {
+    const numValue = parseFloat(value);
+    if (Number.isNaN(numValue)) {
+      throw new Error(`Invalid WC value: "${value}" cannot be parsed to a number`);
+    }
+    return `WC="${numValue.toFixed(2)}"`;
+  });
+
+  xmlStr = xmlStr.replace(/<SP\s+([^>]*)\/>/g, (_match, attrs) => {
+    const newAttrs = attrs.replace(/\s*VPOS="[^"]*"\s*/g, ' ').trim();
+    return `<SP ${newAttrs}/>`;
+  });
+
+  xmlStr = xmlStr.replace(/\s+/g, ' ').trim();
+  xmlStr = xmlStr.replace(/>/g, '>\n');
+  return xmlStr;
+};
+
+// Using arrow functions breaks references to `this`.
+/* eslint-disable prefer-arrow-callback */
+/* eslint-disable func-names */
+
+describe('Check .alto export function.', function () {
+  this.timeout(10000);
+
+  it('Should correctly export and reimport text content', async () => {
+    await scribe.terminate();
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/the_past.alto.xml`]);
+
+    const text1Before = scribe.data.ocr.active[0].lines[0].words.map((x) => x.text).join(' ');
+    const text3Before = scribe.data.ocr.active[0].lines[2].words.map((x) => x.text).join(' ');
+
+    const altoOutStr = writeAlto({ ocrData: scribe.data.ocr.active });
+
+    const encoder = new TextEncoder();
+    const encoded = encoder.encode(altoOutStr);
+
+    await scribe.terminate();
+    await scribe.importFiles({ ocrFiles: [encoded.buffer] });
+
+    const text1After = scribe.data.ocr.active[0].lines[0].words.map((x) => x.text).join(' ');
+    const text3After = scribe.data.ocr.active[0].lines[2].words.map((x) => x.text).join(' ');
+
+    assert.strictEqual(text1Before, text1After);
+    assert.strictEqual(text3Before, text3After);
+  }).timeout(10000);
+
+  it('Should correctly export and reimport confidence scores', async () => {
+    await scribe.terminate();
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/the_past.alto.xml`]);
+
+    const word1Before = scribe.data.ocr.active[0].lines[0].words[0];
+    const word2Before = scribe.data.ocr.active[0].lines[0].words[1];
+    const conf1Before = word1Before.conf;
+    const conf2Before = word2Before.conf;
+
+    const altoOutStr = writeAlto({ ocrData: scribe.data.ocr.active });
+
+    const encoder = new TextEncoder();
+    const encoded = encoder.encode(altoOutStr);
+
+    await scribe.terminate();
+    await scribe.importFiles({ ocrFiles: [encoded.buffer] });
+
+    const word1After = scribe.data.ocr.active[0].lines[0].words[0];
+    const word2After = scribe.data.ocr.active[0].lines[0].words[1];
+
+    assert.approximately(word1After.conf, conf1Before, 1, 'Word 1 confidence should be approximately the same');
+    assert.approximately(word2After.conf, conf2Before, 1, 'Word 2 confidence should be approximately the same');
+  }).timeout(10000);
+
+  it('Should correctly export and reimport font styles', async () => {
+    await scribe.terminate();
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/the_past.alto.xml`]);
+
+    const boldBefore1 = scribe.data.ocr.active[0].lines[0].words[0].style.bold;
+    const boldBefore2 = scribe.data.ocr.active[0].lines[0].words[1].style.bold;
+
+    const altoOutStr = writeAlto({ ocrData: scribe.data.ocr.active });
+
+    const encoder = new TextEncoder();
+    const encoded = encoder.encode(altoOutStr);
+
+    await scribe.terminate();
+    await scribe.importFiles({ ocrFiles: [encoded.buffer] });
+
+    const boldAfter1 = scribe.data.ocr.active[0].lines[0].words[0].style.bold;
+    const boldAfter2 = scribe.data.ocr.active[0].lines[0].words[1].style.bold;
+
+    assert.strictEqual(boldBefore1, boldAfter1, 'Word 1 bold style should be preserved');
+    assert.strictEqual(boldBefore2, boldAfter2, 'Word 2 bold style should be preserved');
+  }).timeout(10000);
+
+  it('Should correctly export and reimport font family', async () => {
+    await scribe.terminate();
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/the_past.alto.xml`]);
+
+    const fontBefore = scribe.data.ocr.active[0].lines[0].words[0].style.font;
+
+    const altoOutStr = writeAlto({ ocrData: scribe.data.ocr.active });
+
+    const encoder = new TextEncoder();
+    const encoded = encoder.encode(altoOutStr);
+
+    await scribe.terminate();
+    await scribe.importFiles({ ocrFiles: [encoded.buffer] });
+
+    const fontAfter = scribe.data.ocr.active[0].lines[0].words[0].style.font;
+
+    assert.strictEqual(fontBefore, fontAfter, 'Font family should be preserved');
+  }).timeout(10000);
+
+  it('Should match original ALTO XML structure after round-trip (content-only comparison)', async () => {
+    await scribe.terminate();
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/simple_paragraph.alto.xml`]);
+
+    const originalAltoStr = await readTextFileUniversal(`${ASSETS_PATH_KARMA}/simple_paragraph.alto.xml`);
+    const altoOutStr = writeAlto({ ocrData: scribe.data.ocr.active });
+
+    const normalizedOriginal = normalizeAlto(originalAltoStr);
+    const normalizedExported = normalizeAlto(altoOutStr);
+
+    assert.strictEqual(normalizedExported, normalizedOriginal, 'Exported ALTO should match original after normalization');
+  }).timeout(10000);
+
+  after(async () => {
+    await scribe.terminate();
+  });
+}).timeout(120000);
diff --git a/tests/module/importDocx.spec.js b/tests/module/importDocx.spec.js
new file mode 100644
index 0000000..d9ee7d3
--- /dev/null
+++ b/tests/module/importDocx.spec.js
@@ -0,0 +1,240 @@
+// Relative imports are required to run in browser.
+/* eslint-disable import/no-relative-packages */
+import { assert, config } from '../../node_modules/chai/chai.js';
+import scribe from '../../scribe.js';
+import { ASSETS_PATH_KARMA } from '../constants.js';
+
+config.truncateThreshold = 0; // Disable truncation for actual/expected values on assertion failure.
+
+// Using arrow functions breaks references to `this`.
+/* eslint-disable prefer-arrow-callback */
+/* eslint-disable func-names */
+
+// Skip tests prior to Node.js EOL (20.x) where the native File class is available.
+// While the library should be compatible with earlier versions of Node.js,
+// getting every test to run on versions that are already EOL is not a priority.
+const itSkipNodeEOL = typeof process === 'undefined' || parseInt(process.versions.node.split('.')[0]) >= 20 ? it : xit;
+
+describe('Check docx import function.', function () {
+  this.timeout(10000);
+
+  itSkipNodeEOL('Should import docx file', async () => {
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/testocr.abbyy.xml`]);
+    const docxData = await scribe.exportData('docx');
+
+    await scribe.terminate();
+
+    const docxFile = new File([docxData], 'test.docx', { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });
+
+    await scribe.importFiles([docxFile]);
+  });
+
+  itSkipNodeEOL('Should correctly import text content from docx', async () => {
+    const text1 = scribe.data.ocr.active[0].lines[0].words.map((x) => x.text).join(' ');
+
+    assert.include(text1, 'This is a lot of 12 point text');
+  }).timeout(10000);
+
+  itSkipNodeEOL('Should correctly import paragraphs from docx', async () => {
+    assert.isTrue(scribe.data.ocr.active[0].lines.length > 0);
+    assert.isTrue(scribe.data.ocr.active[0].pars.length > 0);
+  }).timeout(10000);
+
+  after(async () => {
+    await scribe.terminate();
+  });
+}).timeout(120000);
+
+describe('Check export -> import round-trip for docx files.', function () {
+  this.timeout(10000);
+
+  itSkipNodeEOL('Exporting and importing docx should preserve text content', async () => {
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/testocr.abbyy.xml`]);
+
+    const originalText = scribe.data.ocr.active.map((page) => page.lines.map((line) => line.words.map((word) => word.text).join(' ')).join('\n')).join('\n\n');
+
+    const docxData = await scribe.exportData('docx');
+    const docxFile = new File([docxData], 'test.docx', { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });
+
+    await scribe.terminate();
+    await scribe.importFiles([docxFile]);
+
+    const importedText = scribe.data.ocr.active.map((page) => page.lines.map((line) => line.words.map((word) => word.text).join(' ')).join('\n')).join('\n\n');
+
+    assert.include(importedText, 'This is a lot of 12 point text');
+    assert.include(importedText, 'The quick brown dog jumped');
+  }).timeout(10000);
+
+  after(async () => {
+    await scribe.terminate();
+  });
+}).timeout(120000);
+
+describe('Check that font styles are preserved in docx round-trip.', function () {
+  this.timeout(10000);
+
+  itSkipNodeEOL('Bold style is preserved in round-trip', async () => {
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/complaint_1.abbyy.xml`]);
+
+    const originalBoldWord = scribe.data.ocr.active[1].lines[3].words[0];
+    assert.isTrue(originalBoldWord.style.bold);
+
+    const docxData = await scribe.exportData('docx');
+    const docxFile = new File([docxData], 'test.docx', { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });
+
+    await scribe.terminate();
+    await scribe.importFiles([docxFile]);
+
+    let foundBoldWord = false;
+    for (const page of scribe.data.ocr.active) {
+      for (const line of page.lines) {
+        for (const word of line.words) {
+          if (word.style.bold) {
+            foundBoldWord = true;
+            break;
+          }
+        }
+        if (foundBoldWord) break;
+      }
+      if (foundBoldWord) break;
+    }
+
+    assert.isTrue(foundBoldWord, 'Should have at least one bold word after round-trip');
+  }).timeout(10000);
+
+  itSkipNodeEOL('Italic style is preserved in round-trip', async () => {
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/E.D.Mich._2_12-cv-13821-AC-DRG_1_0.xml`]);
+
+    const originalItalicWord = scribe.data.ocr.active[0].lines[30].words[0];
+    assert.isTrue(originalItalicWord.style.italic);
+
+    const docxData = await scribe.exportData('docx');
+    const docxFile = new File([docxData], 'test.docx', { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });
+
+    await scribe.terminate();
+    await scribe.importFiles([docxFile]);
+
+    let foundItalicWord = false;
+    for (const page of scribe.data.ocr.active) {
+      for (const line of page.lines) {
+        for (const word of line.words) {
+          if (word.style.italic) {
+            foundItalicWord = true;
+            break;
+          }
+        }
+        if (foundItalicWord) break;
+      }
+      if (foundItalicWord) break;
+    }
+
+    assert.isTrue(foundItalicWord, 'Should have at least one italic word after round-trip');
+  }).timeout(10000);
+
+  after(async () => {
+    await scribe.terminate();
+  });
+}).timeout(120000);
+
+describe('Check that small caps are preserved in docx round-trip.', function () {
+  this.timeout(10000);
+
+  itSkipNodeEOL('Small caps style is preserved in round-trip', async () => {
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/econometrica_example.abbyy.xml`]);
+
+    const originalSmallCapsWord = scribe.data.ocr.active[0].lines[4].words[0];
+    const originalText = originalSmallCapsWord.text;
+
+    const docxData = await scribe.exportData('docx');
+    const docxFile = new File([docxData], 'test.docx', { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });
+
+    await scribe.terminate();
+    await scribe.importFiles([docxFile]);
+
+    let foundSmallCapsWord = false;
+    for (const page of scribe.data.ocr.active) {
+      for (const line of page.lines) {
+        for (const word of line.words) {
+          if (word.style.smallCaps) {
+            foundSmallCapsWord = true;
+            break;
+          }
+        }
+        if (foundSmallCapsWord) break;
+      }
+      if (foundSmallCapsWord) break;
+    }
+
+    assert.isTrue(foundSmallCapsWord, 'Should have at least one small caps word after round-trip');
+  }).timeout(10000);
+
+  after(async () => {
+    await scribe.terminate();
+  });
+}).timeout(120000);
+
+describe('Check multi-page docx import.', function () {
+  this.timeout(10000);
+
+  itSkipNodeEOL('Should correctly handle multi-page documents', async () => {
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/CSF_Proposed_Budget_Book_June_2024_r8_30_all_orientations.abbyy.xml`]);
+
+    const originalPageCount = scribe.data.ocr.active.length;
+
+    const docxData = await scribe.exportData('docx');
+    const docxFile = new File([docxData], 'test.docx', { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });
+
+    await scribe.terminate();
+    await scribe.importFiles([docxFile]);
+
+    assert.isTrue(scribe.data.ocr.active.length > 0);
+
+    for (const page of scribe.data.ocr.active) {
+      assert.isTrue(page.lines.length > 0 || scribe.data.ocr.active.indexOf(page) > 0);
+    }
+  }).timeout(20000);
+
+  after(async () => {
+    await scribe.terminate();
+  });
+}).timeout(120000);
+
+describe('Check that font families are preserved in docx round-trip.', function () {
+  this.timeout(10000);
+
+  itSkipNodeEOL('Font family is preserved in round-trip', async () => {
+    await scribe.importFiles([`${ASSETS_PATH_KARMA}/testocr.abbyy.xml`]);
+
+    const originalFontWord = scribe.data.ocr.active[0].lines[0].words[0];
+    const originalFont = originalFontWord.style.font;
+    assert.isNotNull(originalFont, 'Original word should have a font');
+    assert.isString(originalFont, 'Font should be a string');
+
+    const docxData = await scribe.exportData('docx');
+    const docxFile = new File([docxData], 'test.docx', { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });
+
+    await scribe.terminate();
+    await scribe.importFiles([docxFile]);
+
+    let foundFontWord = false;
+    for (const page of scribe.data.ocr.active) {
+      for (const line of page.lines) {
+        for (const word of line.words) {
+          if (word.style.font) {
+            foundFontWord = true;
+            assert.strictEqual(word.style.font, originalFont, `Font should be preserved as "${originalFont}"`);
+            break;
+          }
+        }
+        if (foundFontWord) break;
+      }
+      if (foundFontWord) break;
+    }
+
+    assert.isTrue(foundFontWord, 'Should have at least one word with font family after round-trip');
+  }).timeout(10000);
+
+  after(async () => {
+    await scribe.terminate();
+  });
+}).timeout(120000);