Spracherkennung für: .ts vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]
import { describe, expect, it } from "vitest";
import { sanitizeForPlainText } from "./sanitize-text.js";
// ---------------------------------------------------------------------------
// sanitizeForPlainText
// ---------------------------------------------------------------------------
describe("sanitizeForPlainText", () => {
// --- line breaks --------------------------------------------------------
it("converts <br> to newline", () => {
expect(sanitizeForPlainText("hello<br>world")).toBe("hello\nworld");
});
it("converts self-closing <br/> and <br /> variants", () => {
expect(sanitizeForPlainText("a<br/>b")).toBe("a\nb");
expect(sanitizeForPlainText("a<br />b")).toBe("a\nb");
});
// --- inline formatting --------------------------------------------------
it("converts <b> and <strong> to WhatsApp bold", () => {
expect(sanitizeForPlainText("<b>bold</b>")).toBe("*bold*");
expect(sanitizeForPlainText("<strong>bold</strong>")).toBe("*bold*");
});
it("converts <i> and <em> to WhatsApp italic", () => {
expect(sanitizeForPlainText("<i>italic</i>")).toBe("_italic_");
expect(sanitizeForPlainText("<em>italic</em>")).toBe("_italic_");
});
it("converts <s>, <strike>, and <del> to WhatsApp strikethrough", () => {
expect(sanitizeForPlainText("<s>deleted</s>")).toBe("~deleted~");
expect(sanitizeForPlainText("<del>removed</del>")).toBe("~removed~");
expect(sanitizeForPlainText("<strike>old</strike>")).toBe("~old~");
});
it("converts <code> to backtick wrapping", () => {
expect(sanitizeForPlainText("<code>foo()</code>")).toBe("`foo()`");
});
// --- block elements -----------------------------------------------------
it("converts <p> and <div> to newlines", () => {
expect(sanitizeForPlainText("<p>paragraph</p>")).toBe("\nparagraph\n");
});
it("converts headings to bold text with newlines", () => {
expect(sanitizeForPlainText("<h1>Title</h1>")).toBe("\n*Title*\n");
expect(sanitizeForPlainText("<h3>Section</h3>")).toBe("\n*Section*\n");
});
it("converts <li> to bullet points", () => {
expect(sanitizeForPlainText("<li>item one</li><li>item two</li>")).toBe(
"• item one\n• item two\n",
);
});
// --- tag stripping ------------------------------------------------------
it("strips unknown/remaining tags", () => {
expect(sanitizeForPlainText('<span class="x">text</span>')).toBe("text");
expect(sanitizeForPlainText('<a href="
https://example.com">link</a>')).toBe("link");
});
it("preserves angle-bracket autolinks", () => {
expect(sanitizeForPlainText("See <
https://example.com/path?q=1> now")).toBe(
"See
https://example.com/path?q=1 now",
);
});
// --- passthrough --------------------------------------------------------
it("passes through clean text unchanged", () => {
expect(sanitizeForPlainText("hello world")).toBe("hello world");
});
it("does not corrupt angle brackets in prose", () => {
// `a < b` does not match `<tag>` pattern because there is no closing `>`
// immediately after a tag-like sequence.
expect(sanitizeForPlainText("a < b && c > d")).toBe("a < b && c > d");
});
// --- mixed content ------------------------------------------------------
it("handles mixed HTML content", () => {
const input = "Hello<br><b>world</b> this is <i>nice</i>";
expect(sanitizeForPlainText(input)).toBe("Hello\n*world* this is _nice_");
});
it("collapses excessive newlines", () => {
expect(sanitizeForPlainText("a<br><br><br><br>b")).toBe("a\n\nb");
});
});