// Utilities for splitting outbound text into platform-sized chunks without // unintentionally breaking on newlines. Using [\s\S] keeps newlines inside // the chunk so messages are only split when they truly exceed the limit.
import type { ChannelId } from "../channels/plugins/types.core.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans } from "../markdown/fences.js"; import { resolveChannelStreamingChunkMode } from "../plugin-sdk/channel-streaming.js"; import { resolveAccountEntry } from "../routing/account-lookup.js"; import { normalizeAccountId } from "../routing/session-key.js"; import { chunkTextByBreakResolver } from "../shared/text-chunking.js"; import { INTERNAL_MESSAGE_CHANNEL } from "../utils/message-channel.js";
// Normalize to \n so blank line detection is consistent. const normalized = text.replace(/\r\n?/g, "\n");
// Fast-path: if there are no blank-line paragraph separators, do not split. // (We *do not* early-return based on `limit` — newline mode is about paragraph // boundaries, not only exceeding a length limit.) const paragraphRe = /\n[\t ]*\n+/; if (!paragraphRe.test(normalized)) { if (normalized.length <= limit) { return [normalized];
} if (!splitLongParagraphs) { return [normalized];
} return chunkText(normalized, limit);
}
const spans = parseFenceSpans(normalized);
const parts: string[] = []; const re = /\n[\t ]*\n+/g; // paragraph break: blank line(s), allowing whitespace
let lastIndex = 0; for (const match of normalized.matchAll(re)) { const idx = match.index ?? 0;
// Do not split on blank lines that occur inside fenced code blocks. if (!isSafeFenceBreak(spans, idx)) { continue;
}
export function chunkMarkdownTextWithMode(text: string, limit: number, mode: ChunkMode): string[] { if (mode === "newline") { // Paragraph chunking is fence-safe because we never split at arbitrary indices. // If a paragraph must be split by length, defer to the markdown-aware chunker. const paragraphChunks = chunkByParagraph(text, limit, { splitLongParagraphs: false }); const out: string[] = []; for (const chunk of paragraphChunks) { const nested = chunkMarkdownText(chunk, limit); if (!nested.length && chunk) {
out.push(chunk);
} else {
out.push(...nested);
}
} return out;
} return chunkMarkdownText(text, limit);
}
function splitByNewline(
text: string,
isSafeBreak: (index: number) => boolean = () => true,
): string[] { const lines: string[] = [];
let start = 0; for (let i = 0; i < text.length; i++) { if (text[i] === "\n" && isSafeBreak(i)) {
lines.push(text.slice(start, i));
start = i + 1;
}
}
lines.push(text.slice(start)); return lines;
}
function resolveChunkEarlyReturn(text: string, limit: number): string[] | undefined { if (!text) { return [];
} if (limit <= 0) { return [text];
} if (text.length <= limit) { return [text];
} return undefined;
}
export function chunkText(text: string, limit: number): string[] { const early = resolveChunkEarlyReturn(text, limit); if (early) { return early;
} return chunkTextByBreakResolver(text, limit, (window) => { // 1) Prefer a newline break inside the window (outside parentheses). const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window, 0, window.length); // 2) Otherwise prefer the last whitespace (word boundary) inside the window. return lastNewline > 0 ? lastNewline : lastWhitespace;
});
}
export function chunkMarkdownText(text: string, limit: number): string[] { const early = resolveChunkEarlyReturn(text, limit); if (early) { return early;
}
const chunks: string[] = []; const spans = parseFenceSpans(text);
let start = 0;
let reopenFence: ReturnType<typeof findFenceSpanAt> | undefined;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.