import type { MediaUnderstandingOutput } from "./types.js" ;
const MEDIA_PLACEHOLDER_RE = /^<media:[^>]+>(\s*\([^)]*\))?$/i;
const MEDIA_PLACEHOLDER_TOKEN_RE = /^<media:[^>]+>(\s*\([^)]*\))?\s*/i;
export function extractMediaUserText(body?: string): string | undefined {
const trimmed = body?.trim() ?? "" ;
if (!trimmed) {
return undefined;
}
if (MEDIA_PLACEHOLDER_RE.test(trimmed)) {
return undefined;
}
const cleaned = trimmed.replace(MEDIA_PLACEHOLDER_TOKEN_RE, "" ).trim();
return cleaned || undefined;
}
function formatSection(
title: string,
kind: "Transcript" | "Description" ,
text: string,
userText?: string,
): string {
const lines = [`[${title}]`];
if (userText) {
lines.push(`User text:\n${userText}`);
}
lines.push(`${kind}:\n${text}`);
return lines.join("\n" );
}
export function formatMediaUnderstandingBody(params: {
body?: string;
outputs: MediaUnderstandingOutput[];
}): string {
const outputs = params.outputs.filter((output) => output.text.trim());
if (outputs.length === 0 ) {
return params.body ?? "" ;
}
const userText = extractMediaUserText(params.body);
const sections: string[] = [];
if (userText && outputs.length > 1 ) {
sections.push(`User text:\n${userText}`);
}
const counts = new Map<MediaUnderstandingOutput["kind" ], number>();
for (const output of outputs) {
counts.set(output.kind, (counts.get(output.kind) ?? 0 ) + 1 );
}
const seen = new Map<MediaUnderstandingOutput["kind" ], number>();
for (const output of outputs) {
const count = counts.get(output.kind) ?? 1 ;
const next = (seen.get(output.kind) ?? 0 ) + 1 ;
seen.set(output.kind, next);
const suffix = count > 1 ? ` ${next}/${count}` : "" ;
if (output.kind === "audio.transcription" ) {
sections.push(
formatSection(
`Audio${suffix}`,
"Transcript" ,
output.text,
outputs.length === 1 ? userText : undefined,
),
);
continue ;
}
if (output.kind === "image.description" ) {
sections.push(
formatSection(
`Image${suffix}`,
"Description" ,
output.text,
outputs.length === 1 ? userText : undefined,
),
);
continue ;
}
sections.push(
formatSection(
`Video${suffix}`,
"Description" ,
output.text,
outputs.length === 1 ? userText : undefined,
),
);
}
return sections.join("\n\n" ).trim();
}
export function formatAudioTranscripts(outputs: MediaUnderstandingOutput[]): string {
if (outputs.length === 1 ) {
return outputs[0 ].text;
}
return outputs.map((output, index) => `Audio ${index + 1 }:\n${output.text}`).join("\n\n" );
}
Messung V0.5 in Prozent C=100 H=91 G=95
¤ Dauer der Verarbeitung: 0.12 Sekunden
(vorverarbeitet am 2026-06-09)
¤
*© Formatika GbR, Deutschland