/**
* Sanitize model output for plain - text messaging surfaces .
*
* LLMs occasionally produce HTML tags ( ` < br > ` , ` < b > ` , ` < i > ` , etc . ) that render
* correctly on web but appear as literal text on WhatsApp , Signal , SMS , and IRC .
*
* Converts common inline HTML to lightweight - markup equivalents used by
* WhatsApp / Signal / Telegram and strips any remaining tags .
*
* @ see https : //github.com/openclaw/openclaw/issues/31884
* @ see https : //github.com/openclaw/openclaw/issues/18558
*/
/**
* Convert common HTML tags to their plain - text / lightweight - markup equivalents
* and strip anything that remains .
*
* The function is intentionally conservative — it only targets tags that models
* are known to produce and avoids false positives on angle brackets in normal
* prose ( e . g . ` a < b ` ) .
*/
export function sanitizeForPlainText(text: string): string {
return (
text
// Preserve angle-bracket autolinks as plain URLs before tag stripping.
.replace(/<((?:https?:\/\/|mailto:)[^<>\s]+)>/gi, "$1" )
// Line breaks
.replace(/<br\s*\/?>/gi, "\n" )
// Block elements → newlines
.replace(/<\/?(p|div)>/gi, "\n" )
// Bold → WhatsApp/Signal bold
.replace(/<(b|strong)>(.*?)<\/\1 >/gi, "*$2*" )
// Italic → WhatsApp/Signal italic
.replace(/<(i|em)>(.*?)<\/\1 >/gi, "_$2_" )
// Strikethrough → WhatsApp/Signal strikethrough
.replace(/<(s|strike|del)>(.*?)<\/\1 >/gi, "~$2~" )
// Inline code
.replace(/<code>(.*?)<\/code>/gi, "`$1`" )
// Headings → bold text with newline
.replace(/<h[1 -6 ][^>]*>(.*?)<\/h[1 -6 ]>/gi, "\n*$1*\n" )
// List items → bullet points
.replace(/<li[^>]*>(.*?)<\/li>/gi, "• $1\n" )
// Strip remaining HTML tags (require tag-like structure: <word...>)
.replace(/<\/?[a-z][a-z0-9 ]*\b[^>]*>/gi, "" )
// Collapse 3+ consecutive newlines into 2
.replace(/\n{3 ,}/g, "\n\n" )
);
}
Messung V0.5 in Prozent C=98 H=98 G=97
¤ Dauer der Verarbeitung: 0.18 Sekunden
(vorverarbeitet am 2026-06-10)
¤
*© Formatika GbR, Deutschland