// Shared helpers for parsing MEDIA tokens from command/stdout text.
import { parseFenceSpans } from "../markdown/fences.js"; import { parseAudioTag } from "./audio-tags.js";
// Allow optional wrapping backticks and punctuation after the token; capture the core token.
export const MEDIA_TOKEN_RE = /\bMEDIA:\s*`?([^\n]+)`?/gi;
// Broad structural check: does this look like a local file path? Used only for // stripping MEDIA: lines from output text — never for media approval. function looksLikeLocalFilePath(candidate: string): boolean { return (
candidate.startsWith("/") ||
candidate.startsWith("./") ||
candidate.startsWith("../") ||
candidate.startsWith("~") ||
WINDOWS_DRIVE_RE.test(candidate) ||
candidate.startsWith("\\\\") ||
(!SCHEME_RE.test(candidate) && (candidate.includes("/") || candidate.includes("\\")))
);
}
// Recognize safe local file path patterns for media approval, rejecting // traversal and home-dir paths so they never reach downstream load/send logic. function isLikelyLocalPath(candidate: string): boolean { if (hasTraversalOrHomeDirPrefix(candidate)) { returnfalse;
} return (
candidate.startsWith("/") ||
candidate.startsWith("./") ||
WINDOWS_DRIVE_RE.test(candidate) ||
candidate.startsWith("\\\\") ||
(!SCHEME_RE.test(candidate) && (candidate.includes("/") || candidate.includes("\\")))
);
}
function isValidMedia(
candidate: string,
opts?: { allowSpaces?: boolean; allowBareFilename?: boolean },
) { if (!candidate) { returnfalse;
} if (candidate.length > 4096) { returnfalse;
} if (!opts?.allowSpaces && /\s/.test(candidate)) { returnfalse;
} if (/^https?:\/\//i.test(candidate)) { returntrue;
}
if (isLikelyLocalPath(candidate)) { returntrue;
}
// Hard reject traversal/home-dir patterns before the bare-filename fallback // to prevent path traversal bypasses (e.g. "../../.env" matching HAS_FILE_EXT). if (hasTraversalOrHomeDirPrefix(candidate)) { returnfalse;
}
// Accept bare filenames (e.g. "image.png") only when the caller opts in. // This avoids treating space-split path fragments as separate media items. if (opts?.allowBareFilename && !SCHEME_RE.test(candidate) && HAS_FILE_EXT.test(candidate)) { returntrue;
}
returnfalse;
}
function unwrapQuoted(value: string): string | undefined { const trimmed = value.trim(); if (trimmed.length < 2) { return undefined;
} const first = trimmed[0]; const last = trimmed[trimmed.length - 1]; if (first !== last) { return undefined;
} if (first !== `"` && first !== "'" && first !== "`") { return undefined;
} return trimmed.slice(1, -1).trim();
}
function mayContainFenceMarkers(input: string): boolean { return input.includes("```") || input.includes("~~~");
}
// Check if a character offset is inside any fenced code block function isInsideFence(fenceSpans: Array<{ start: number; end: number }>, offset: number): boolean { return fenceSpans.some((span) => offset >= span.start && offset < span.end);
}
export function splitMediaFromOutput(raw: string): {
text: string;
mediaUrls?: string[];
mediaUrl?: string; // legacy first item for backward compatibility
audioAsVoice?: boolean; // true if [[audio_as_voice]] tag was found
segments?: ParsedMediaOutputSegment[];
} { // KNOWN: Leading whitespace is semantically meaningful in Markdown (lists, indented fences). // We only trim the end; token cleanup below handles removing `MEDIA:` lines. const trimmedRaw = raw.trimEnd(); if (!trimmedRaw.trim()) { return { text: "" };
} const mayContainMediaToken = /media:/i.test(trimmedRaw); const mayContainMarkdownImage = /!\[[^\]]*]\(/.test(trimmedRaw); const mayContainAudioTag = trimmedRaw.includes("[["); if (!mayContainMediaToken && !mayContainMarkdownImage && !mayContainAudioTag) { return { text: trimmedRaw };
}
const pushTextSegment = (text: string) => { if (!text) { return;
} const last = segments[segments.length - 1]; if (last?.type === "text") {
last.text = `${last.text}\n${text}`; return;
}
segments.push({ type: "text", text });
};
// Parse fenced code blocks to avoid extracting MEDIA tokens from inside them const hasFenceMarkers = mayContainFenceMarkers(trimmedRaw); const fenceSpans = hasFenceMarkers ? parseFenceSpans(trimmedRaw) : [];
// Collect tokens line by line so we can strip them cleanly. const lines = trimmedRaw.split("\n"); const keptLines: string[] = [];
let lineOffset = 0; // Track character offset for fence checking for (const line of lines) { // Skip MEDIA extraction if this line is inside a fenced code block if (hasFenceMarkers && isInsideFence(fenceSpans, lineOffset)) {
keptLines.push(line);
pushTextSegment(line);
lineOffset += line.length + 1; // +1 for newline continue;
}
const trimmedStart = line.trimStart(); if (!trimmedStart.toUpperCase().startsWith("MEDIA:")) { const markdownImageResult = collectMarkdownImageSegments({ line, media }); if (!markdownImageResult.foundMedia) {
keptLines.push(line);
pushTextSegment(line);
} else {
foundMediaToken = true; if (markdownImageResult.cleanedLine) {
keptLines.push(markdownImageResult.cleanedLine);
} for (const segment of markdownImageResult.lineSegments) { if (segment.type === "text") {
pushTextSegment(segment.text); continue;
}
segments.push(segment);
}
}
lineOffset += line.length + 1; // +1 for newline continue;
}
if (hasValidMedia) { const beforeText = cleanLineText(pieces.join("")); if (beforeText) {
lineSegments.push({ type: "text", text: beforeText });
}
pieces.length = 0; for (const url of media.slice(mediaStartIndex, mediaStartIndex + validCount)) {
lineSegments.push({ type: "media", url });
} if (invalidParts.length > 0) {
pieces.push(invalidParts.join(" "));
}
} elseif (looksLikeLocalPath) { // Strip MEDIA: lines with local paths even when invalid (e.g. absolute paths // from internal tools like TTS). They should never leak as visible text.
foundMediaToken = true;
} else { // If no valid media was found in this match, keep the original token text.
pieces.push(match[0]);
}
// If the line becomes empty, drop it. if (cleanedLine) {
keptLines.push(cleanedLine);
lineSegments.push({ type: "text", text: cleanedLine });
} for (const segment of lineSegments) { if (segment.type === "text") {
pushTextSegment(segment.text); continue;
}
segments.push(segment);
}
lineOffset += line.length + 1; // +1 for newline
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.