import { search } from "fast-fuzzy";
import { XMLParser } from "fast-xml-parser";
import { readCompressed } from "./compression";
import { buildTableXml } from "./ooxml/build/table";
import { buildTocRefXml } from "./ooxml/build/tocRef";
import { buildXeXml } from "./ooxml/build/xe";
import { buildCommentXml } from "./ooxml/build/comment";
import { buildAlignmentXml } from "./ooxml/build/alignment";
export class OOXml {
    constructor(xml, { ignoreMultipleFontSizes = false } = {}) {
        this.jObj = {};
        this.paragraphs = [];
        this.text = "";
        this.fontSize = 0;
        this.origXml = "";
        this.lineHeight = 0;
        this.suppressHyphenation = false;
        if (!xml) {
            this.text = "";
            return;
        }
        if (Array.isArray(xml)) {
            debugger;
            throw ("xml must be string");
        }
        if (!xml.includes("<w:body>")) {
            const actualMarkdown = xml;
            this.setContents(actualMarkdown);
            return;
        }
        this.origXml = xml;
        xml.match(/<w:sz w:val="(\d+)" \/>/g)?.forEach(sizeMatch => {
            const size = parseInt(sizeMatch?.match(/<w:sz w:val="(\d+)" \/>/)?.[1] ?? "");
            if (!this.fontSize || size == this.fontSize) {
                this.fontSize = size;
            }
            else if (!ignoreMultipleFontSizes) {
                throw new Error("Multiple font sizes detected");
            }
        });
        if (xml.match(/<w:suppressAutoHyphens\/>/)) {
            this.suppressHyphenation = true;
        }
        const parser = new XMLParser({ ignoreAttributes: false, preserveOrder: true, removeNSPrefix: true, trimValues: !1 });
        xml = xml
            .replace(/<w:tbl>([\s\S]*?)<\/w:tbl>/g, "<w:p><w:r><w:tbl>$1</w:tbl></w:r></w:p>")
            .replace(/<w:sdt>[\s\S]{4,3000}?<w:sdtContent>/g, "")
            .replace(/<\/w:sdtContent>/g, "")
            .replace(/<\/w:sdt>/g, "")
            .replace(/&#xa0;/g, " ");
        this.jObj = parser.parse(xml);
        const n = deepSearchNode(this.jObj, (a) => a.body).body;
        this.paragraphs = n.map((a) => {
            const o = a;
            if (o.p)
                return o.p
                    .flatMap((c) => {
                    var g;
                    let h = "";
                    c.commentRangeStart && (h += " CMNT("),
                        c.commentRangeEnd && (h += ", " + c[":@"]["@_id"] + ") "),
                        c.pPr &&
                            (c.pPr.find((d) => d.pStyle) && (h += "::style(" + c.pPr.find((d) => d.pStyle)[":@"]["@_val"] + ") "),
                                c.pPr.find((d) => d.jc) && (h += "::align" + c.pPr.find((d) => d.jc)[":@"]["@_val"] + " "),
                                c.pPr.find((d) => d.ind) && (h += "::indent(" + c.pPr.find((d) => d.ind)[":@"]["@_left"] + ") "),
                                c.pPr.find((d) => d.numPr) &&
                                    (h += "::li(" + c.pPr.find((d) => d.numPr).numPr.find((d) => d.ilvl)[":@"]["@_val"] + "," + (c.pPr.find((d) => d.numPr).numPr.find((d) => d.numId)[":@"]["@_val"] ?? 0) + ") "));
                    let w = "";
                    return c.fldSimple
                        ? [
                            "::nbsp" +
                                c[":@"]["@_instr"].trim().replace(/REF\s+/, "") +
                                "(" +
                                c.fldSimple
                                    .map((d) => {
                                    var m;
                                    return (m = d.r) == null ? void 0 : m.flatMap((y) => y.t.map((C) => C["#text"]));
                                })
                                    .filter((d) => d)
                                    .map((d) => d.join(" ").trim()) +
                                ")",
                        ]
                        : [
                            h,
                            ...(((g = c.r) == null
                                ? void 0
                                : g.map((d) => {
                                    var C, O, I, F, W, Z, Q;
                                    if (d.tbl)
                                        return [
                                            `\n---------------------\n` +
                                                d.tbl
                                                    .filter((L) => L.tr)
                                                    .map((L) => {
                                                    let H = L.tr.filter((D) => D.tc);
                                                    return (Array.isArray(H) || (H = [H]),
                                                        "|" +
                                                            H.map((D) => D.tc
                                                                .filter((G) => G.p)
                                                                .map((G) => G.p
                                                                .filter((Y) => Y.r)
                                                                .map((Y) => Y.r.find((j) => j?.t)?.t.find((j) => j["#text"])["#text"])
                                                                .join(""))).join("|") +
                                                            "|");
                                                }).join("\n") +
                                                `\n---------------------\n`,
                                        ];
                                    let m = d.t ? ((C = d.t.find((N) => N["#text"])) == null ? void 0 : C["#text"]) : "";
                                    (O = d.instrText) != null && O.find((N) => N["#text"]) && (m = d.instrText.find((N) => N["#text"])["#text"]);
                                    let y = m;
                                    return ((I = d.rPr) != null && I.find((N) => N.i) && (w += "*"),
                                        (F = d.rPr) != null && F.find((N) => N.b) && (w += "**"),
                                        (W = d.rPr) != null && W.find((N) => N.highlight) && (w += "***"),
                                        (Z = d.rPr) != null && Z.find((N) => N.sz) && (y = y),
                                        d.tab && (y = "::tab "),
                                        m && w && ((y = w + y + w), (w = "")),
                                        ((Q = d[":@"]) == null ? void 0 : Q["@_space"]) == "preserve" &&
                                            ((y = y == null ? void 0 : y.trim()), m != null && m.match(/^\s/) && (y = " " + y), m != null && m.match(/\s$/) && m.length > 1 && (y += " ")),
                                        y);
                                })) ?? [""]),
                        ];
                })
                    .flat()
                    .filter((c) => c);
            if (o.sectPr)
                return [""];
        });
        let res = this.paragraphs
            .filter(Boolean)
            .flatMap((a) => a.join("|#|").replace(/\|#\|\s*REF (_Ref\d+)\s*.*?\|#\|+([^\|#]+?)\|#\|/g, "|#|$1($2)|#|"))
            .join("\n\n")
            .replace(/REF\s*(_Ref\d+)\s*(\\?\n)?(\|{2,4}|\s)((\.?\d)+)/g, "$1($4)")
            .replace(/„?(\|#\|)*([^\|]+)(\|#\|)*\s?XE "[^"]*"(\\f)?(\|#\|)*“?/g, "XE($2)")
            .replace(/(_Ref(\d+)\([^\)]+\))\s*TC\s*.{0,10}_Ref\2\([^\)]{0,10}\)/g, "XA($1)");
        const resWithHeuristicsApplied = res
            .replace(/Ref(\d+\(.*?\))\|#\|([a-zA-ZöäüÖÄÜ])/g, "Ref$1 |#|$2")
            .replace(/\|#\|/g, "")
            .replace(/\s*CMNT\((.*?)\s+,\s?(\d+)\)\s+/g, " CMNT($1, $2) ")
            .replace(/\*\*\* \*\*\*/g, "")
            .replace(/\*\* \*\*/g, "")
            .replace(/\* \*/g, "")
            .replace(/::nbsp\s/g, " ")
            .replace(/\s::nbsp/g, " ")
            .replace(/\*{2}(.{1,100}?)\*{4}(.{1,100}?)\*{2}/g, "**$1$2**") // remove adjacent bolds
            .replace(/\*{2}(.{1,100}?)\*{4}(.{1,100}?)\*{2}/g, "**$1$2**"); // remove adjacent bolds
        this.text = resWithHeuristicsApplied;
    }
    getContents() {
        return this.text;
    }
    setContents(t) {
        this.text = t;
    }
    getTOC(linesWithListInfo, text = this.text.split("\n")) {
        let headingRegex = /^(::style.*?\))?§/;
        let headingLines = text
            .map((l, i) => (l.match(headingRegex) ? i : -1))
            .filter((l) => l > -1);
        if (headingLines.length == 0) {
            headingRegex = /^::style.*?(heading.*?|headline.*?|h\d|eslegal1|.*berschrift\d)\)/i;
            headingLines = text
                .map((l, i) => (l.match(headingRegex) ? i : -1))
                .filter((l) => l > -1);
        }
        const candidateLists = findHeadlineStyles(text, headingLines, headingRegex, linesWithListInfo);
        const linesWithListInfoClean = linesWithListInfo.map(({ text, listString, level }) => ({ text: stripCustomTags(text).trim().replace(/[\* ]/g, ""), listString, level }));
        const textToLineMap = new Map();
        linesWithListInfoClean.forEach(line => {
            if (!textToLineMap.has(line.text)) {
                textToLineMap.set(line.text, []);
            }
            textToLineMap.get(line.text).push(line);
        });
        const scoredCandidates = candidateLists.map(candidates => {
            // Compile regex patterns once
            const headlineNumRegex = /^.{0,5}?(\d+\.?)+/;
            const r = candidates.map(({ text, i }) => {
                // Clean text once
                text = stripCustomTags(text.replace(headingRegex, headingRegex.toString().includes("§") ? "§" : "")).trim();
                const textClean = text.replace(/[\* ]/g, "");
                // Fast O(1) lookup
                let linesWithMatchingText = textToLineMap.get(textClean) || [];
                if (linesWithMatchingText.length === 0) {
                    linesWithMatchingText = linesWithListInfoClean.filter(it => it.text.slice(0, 50) == textClean.slice(0, 50));
                }
                if (linesWithMatchingText.length === 0) {
                    // Only use expensive fuzzy search when necessary
                    const fuzzyMatch = search(text, linesWithListInfo, { keySelector: (obj) => obj.text, returnMatchData: true }).filter(r => r.score > .95);
                    linesWithMatchingText = fuzzyMatch.map(r => r.item);
                    if (linesWithMatchingText.length === 0) {
                        return { text, i, listString: undefined };
                    }
                    text = linesWithMatchingText[0].text;
                }
                const matchedLine = linesWithMatchingText.length > 1
                    ? linesWithMatchingText.sort((a, b) => (a.level ?? 0) - (b.level ?? 0))[0]
                    : linesWithMatchingText[0];
                let listString = matchedLine.listString;
                if (!listString) {
                    listString = text.match(headlineNumRegex)?.[0];
                    text = text.replace(headlineNumRegex, "").trim();
                }
                return { text, i, listString, level: matchedLine.level };
            });
            const duplicates = r.filter((l, i) => r.findIndex((c) => c.listString == l.listString) != i);
            return { score: -duplicates.length, candidates: r };
        });
        const bestCandidate = scoredCandidates.sort((l, c) => c.score - l.score)[0].candidates;
        return bestCandidate;
    }
    getSections(t, textLines = this.text.split("\n")) {
        const r = this.getTOC(t, textLines);
        return r.flatMap(({ text, i, listString: f }) => {
            let l;
            // @ts-expect-error
            const sectionEnd = ((l = r[r.indexOf(r.find((c) => c.i > i))]) == null ? void 0 : l.i) ?? textLines.length;
            const content = textLines.slice(i + 1, sectionEnd).join("\n");
            const section = t ? { text, content, listString: f } : { text, content };
            if (section.content.length > 5000) {
                try {
                    const subDivLines = this.getSections(t, textLines.slice(i + 1, sectionEnd));
                    return subDivLines;
                }
                catch (e) {
                    if (e instanceof NoHeadlineFoundError)
                        return [section];
                    throw e;
                }
            }
            return [section];
        });
    }
    getXml(textWithMarkup = this.text) {
        return ("<pkg:package xmlns:pkg='http://schemas.microsoft.com/office/2006/xmlPackage'><pkg:part pkg:name='/_rels/.rels' pkg:contentType='application/vnd.openxmlformats-package.relationships+xml' pkg:padding='512'><pkg:xmlData><Relationships xmlns='http://schemas.openxmlformats.org/package/2006/relationships'><Relationship Id='rId1' Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument' Target='word/document.xml'/></Relationships></pkg:xmlData></pkg:part><pkg:part pkg:name='/word/document.xml' pkg:contentType='application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml'><pkg:xmlData><w:document xmlns:w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' ><w:body>" +
            this.getXmlBody(textWithMarkup) +
            "</w:body></w:document></pkg:xmlData></pkg:part></pkg:package>");
    }
    getXmlBody(textWithMarkup) {
        return ("<w:p>" +
            textWithMarkup
                .split(/\n\n/)
                .filter(Boolean)
                .map((i) => '<w:r><w:t xml:space="preserve">' + i + "</w:t></w:r>")
                .join("</w:p><w:p>") +
            "</w:p>")
            .replace(/(Ziffer|Anlage)?\s*?(_Ref\d+)\(([^\)]+)\)(\s)?/g, (i, f, a, o, l) => `</w:t></w:r>
        ` + (f ? `<w:r><w:t xml:space="preserve"> ${f}&#xa0;</w:t></w:r>` : "") + `
        <w:r><w:fldChar w:fldCharType="separate"/></w:r>
        <w:fldSimple w:instr=" REF  ${a}}n">
        <w:r><w:t xml:space="preserve">${o + (l ? " " : "")}</w:t></w:r>
        </w:fldSimple>
        <w:r><w:t xml:space="preserve">`)
            .replace(/\*\*\*(.*?)\*\*\*/g, '</w:t></w:r><w:r><w:rPr><w:highlight w:val="yellow"/></w:rPr><w:t>$1</w:t></w:r><w:r><w:t>')
            .replace(/\*\*(.*?)\*\*/g, "</w:t></w:r><w:r><w:rPr><w:b /></w:rPr><w:t>$1</w:t></w:r><w:r><w:t>")
            .replace(/\*(.*?)\*/g, "</w:t></w:r><w:r><w:rPr><w:i /></w:rPr><w:t>$1</w:t></w:r><w:r><w:t>")
            .replace(/:tab/g, "</w:t></w:r><w:r><w:tab /></w:r><w:r><w:t>")
            .replace(/\n?---------------------\n([\s\S]*?)\n---------------------\n?/g, buildTableXml)
            .replace(/<w:r>(<w:t[^>]*>)\s?(::style\((.*?)\))?(::alignright)?(::aligncenter)?(::alignboth)?(::indent\((\d+)\))?/g, (i, f, a, o, l, c, h, w, g) => buildAlignmentXml([i, f, a, o, l, c, h, w, g], this.suppressHyphenation, this.lineHeight))
            .replace(/ & /g, " &amp; ")
            .replace(/::li\((\d+)(,(\d+))?\)/g, '<w:numPr><w:ilvl w:val="$1" /><w:numId w:val="$3" /></w:numPr>')
            .replace(/XE\(([^\)]+)\)/g, buildXeXml)
            .replace(/XA\(([^\)]+)\)/, buildTocRefXml)
            .replace(/CMNT\((.*?), (\d+)\)/, buildCommentXml)
            .replace(/<w:r>\n?\s*<w:t>\s*<\/w:t>\n?\s*<\/w:r>/g, "")
            .replace(/<w:p><w:r><\/w:r><\/w:p>/g, "")
            .split("\n")
            .map((i) => i.trim())
            .filter((i) => i).join("\n");
    }
    static decompress(data) {
        return readCompressed(data);
    }
}
function deepSearchNode(node, predicate) {
    if (predicate(node))
        return node;
    for (const key in node) {
        if (typeof node[key] === "object") {
            const result = deepSearchNode(node[key], predicate);
            if (result)
                return result;
        }
    }
}
export function stripCustomTags(e) {
    return e?.replace(/::tab/g, "	")
        .replace(/::style\(.*?\)/g, "")
        .replace(/::align\w+/g, "")
        .replace(/::indent\((\d+)\)/g, "")
        .replace(/::li\((\d+)(,(\d+))?\)/g, "")
        .replace(/::[^ \)]+\)? ?/g, "")
        .replace(/_Ref(\d+)\(([^\)]+)\)/g, "$2")
        .replace(/XE\((.*?)\)/g, "$1");
}
function findHeadlineStyles(allLines, headingLines, headingRegex, lineData) {
    const headingCounts = headingLines.map((l) => allLines[l])
        .map((l) => (l.match(headingRegex) ?? { 1: undefined })[1])
        .reduce((l, c) => ((l[c] = l[c] ? l[c] + 1 : 1), l), {});
    const countsSorted = Object.entries(headingCounts).sort((l, c) => c[1] - l[1]);
    const res = countsSorted.map((it) => {
        let primaryHeadlineStyles = it[0];
        return headingLines
            .map(i => ({
            text: allLines[i],
            i
        }))
            .filter((l) => primaryHeadlineStyles.includes(l.text.match(headingRegex)[1]));
    }).filter(l => {
        const candPatternFirstFoundAtLine = l[0].i;
        const candPatternFirstFoundRel = candPatternFirstFoundAtLine / allLines.length;
        if (candPatternFirstFoundRel > 0.2 && candPatternFirstFoundAtLine > 50)
            return false;
        return true;
        // unlikely that the primary headline pattern starts at 20% into the document
    });
    if (!res.length)
        throw new NoHeadlineFoundError();
    return res;
}
class NoHeadlineFoundError extends Error {
    constructor() {
        super("no headline found");
    }
}
