import type { OpenClawConfig } from "../config/types.openclaw.js" ;
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js" ;
import type { SsrFPolicy } from "../infra/net/ssrf.js" ;
import { logWarn } from "../logger.js" ;
import {
normalizeOptionalLowercaseString,
normalizeOptionalString,
} from "../shared/string-coerce.js" ;
import { canonicalizeBase64, estimateBase64DecodedBytes } from "./base64.js" ;
import { convertHeicToJpeg } from "./image-ops.js" ;
import { detectMime } from "./mime.js" ;
import { extractPdfContent, type PdfExtractedImage } from "./pdf-extract.js" ;
import { readResponseWithLimit } from "./read-response-with-limit.js" ;
export type InputImageContent = PdfExtractedImage;
export type InputFileExtractResult = {
filename: string;
text?: string;
images?: InputImageContent[];
};
export type InputPdfLimits = {
maxPages: number;
maxPixels: number;
minTextChars: number;
};
export type InputFileLimits = {
allowUrl: boolean ;
urlAllowlist?: string[];
allowedMimes: Set<string>;
maxBytes: number;
maxChars: number;
maxRedirects: number;
timeoutMs: number;
pdf: InputPdfLimits;
};
export type InputFileLimitsConfig = {
allowUrl?: boolean ;
allowedMimes?: string[];
maxBytes?: number;
maxChars?: number;
maxRedirects?: number;
timeoutMs?: number;
pdf?: {
maxPages?: number;
maxPixels?: number;
minTextChars?: number;
};
};
export type InputImageLimits = {
allowUrl: boolean ;
urlAllowlist?: string[];
allowedMimes: Set<string>;
maxBytes: number;
maxRedirects: number;
timeoutMs: number;
};
export type InputImageSource =
| {
type: "base64" ;
data: string;
mediaType?: string;
}
| {
type: "url" ;
url: string;
mediaType?: string;
};
export type InputFileSource =
| {
type: "base64" ;
data: string;
mediaType?: string;
filename?: string;
}
| {
type: "url" ;
url: string;
mediaType?: string;
filename?: string;
};
export type InputFetchResult = {
buffer: Buffer;
mimeType: string;
contentType?: string;
};
export const DEFAULT_INPUT_IMAGE_MIMES = [
"image/jpeg" ,
"image/png" ,
"image/gif" ,
"image/webp" ,
"image/heic" ,
"image/heif" ,
];
export const DEFAULT_INPUT_FILE_MIMES = [
"text/plain" ,
"text/markdown" ,
"text/html" ,
"text/csv" ,
"application/json" ,
"application/pdf" ,
];
export const DEFAULT_INPUT_IMAGE_MAX_BYTES = 10 * 1024 * 1024 ;
export const DEFAULT_INPUT_FILE_MAX_BYTES = 5 * 1024 * 1024 ;
export const DEFAULT_INPUT_FILE_MAX_CHARS = 60 _000 ;
export const DEFAULT_INPUT_MAX_REDIRECTS = 3 ;
export const DEFAULT_INPUT_TIMEOUT_MS = 10 _000 ;
export const DEFAULT_INPUT_PDF_MAX_PAGES = 4 ;
export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4 _000 _000 ;
export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200 ;
const NORMALIZED_INPUT_IMAGE_MIME = "image/jpeg" ;
const HEIC_INPUT_IMAGE_MIMES = new Set(["image/heic" , "image/heif" ]);
function rejectOversizedBase64Payload(params: {
data: string;
maxBytes: number;
label: "Image" | "File" ;
}): void {
const estimated = estimateBase64DecodedBytes(params.data);
if (estimated > params.maxBytes) {
throw new Error(
`${params.label} too large: ${estimated} bytes (limit: ${params.maxBytes} bytes)`,
);
}
}
export function normalizeMimeType(value: string | undefined): string | undefined {
const [raw] = value?.split(";" ) ?? [];
return normalizeOptionalLowercaseString(raw);
}
export function parseContentType(value: string | undefined): {
mimeType?: string;
charset?: string;
} {
if (!value) {
return {};
}
const parts = value.split(";" ).map((part) => part.trim());
const mimeType = normalizeMimeType(parts[0 ]);
const charset = parts
.map((part) => normalizeOptionalString(part.match(/^charset=(.+)$/i)?.[1 ]))
.find((part) => part && part.length > 0 );
return { mimeType, charset };
}
export function normalizeMimeList(values: string[] | undefined, fallback: string[]): Set<string> {
const input = values && values.length > 0 ? values : fallback;
return new Set(input.map((value) => normalizeMimeType(value)).filter(Boolean ) as string[]);
}
export function resolveInputFileLimits(config?: InputFileLimitsConfig): InputFileLimits {
return {
allowUrl: config?.allowUrl ?? true ,
allowedMimes: normalizeMimeList(config?.allowedMimes, DEFAULT_INPUT_FILE_MIMES),
maxBytes: config?.maxBytes ?? DEFAULT_INPUT_FILE_MAX_BYTES,
maxChars: config?.maxChars ?? DEFAULT_INPUT_FILE_MAX_CHARS,
maxRedirects: config?.maxRedirects ?? DEFAULT_INPUT_MAX_REDIRECTS,
timeoutMs: config?.timeoutMs ?? DEFAULT_INPUT_TIMEOUT_MS,
pdf: {
maxPages: config?.pdf?.maxPages ?? DEFAULT_INPUT_PDF_MAX_PAGES,
maxPixels: config?.pdf?.maxPixels ?? DEFAULT_INPUT_PDF_MAX_PIXELS,
minTextChars: config?.pdf?.minTextChars ?? DEFAULT_INPUT_PDF_MIN_TEXT_CHARS,
},
};
}
export async function fetchWithGuard(params: {
url: string;
maxBytes: number;
timeoutMs: number;
maxRedirects: number;
policy?: SsrFPolicy;
auditContext?: string;
}): Promise<InputFetchResult> {
const { response, release } = await fetchWithSsrFGuard({
url: params.url,
maxRedirects: params.maxRedirects,
timeoutMs: params.timeoutMs,
policy: params.policy,
auditContext: params.auditContext,
init: { headers: { "User-Agent" : "OpenClaw-Gateway/1.0" } },
});
try {
if (!response.ok) {
throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
}
const contentLength = response.headers.get("content-length" );
if (contentLength) {
const size = Number(contentLength);
if (Number.isFinite(size) && size > params.maxBytes) {
throw new Error(`Content too large: ${size} bytes (limit: ${params.maxBytes} bytes)`);
}
}
const buffer = await readResponseWithLimit(response, params.maxBytes);
const contentType = response.headers.get("content-type" ) || undefined;
const parsed = parseContentType(contentType);
const mimeType = parsed.mimeType ?? "application/octet-stream" ;
return { buffer, mimeType, contentType };
} finally {
await release();
}
}
function decodeTextContent(buffer: Buffer, charset: string | undefined): string {
const encoding = normalizeOptionalLowercaseString(charset) || "utf-8" ;
try {
return new TextDecoder(encoding).decode(buffer);
} catch {
return new TextDecoder("utf-8" ).decode(buffer);
}
}
function clampText(text: string, maxChars: number): string {
if (text.length <= maxChars) {
return text;
}
return text.slice(0 , maxChars);
}
async function normalizeInputImage(params: {
buffer: Buffer;
mimeType?: string;
limits: InputImageLimits;
}): Promise<InputImageContent> {
const declaredMime = normalizeMimeType(params.mimeType) ?? "application/octet-stream" ;
const detectedMime = normalizeMimeType(
await detectMime({ buffer: params.buffer, headerMime: params.mimeType }),
);
if (declaredMime.startsWith("image/" ) && detectedMime && !detectedMime.startsWith("image/" )) {
throw new Error(`Unsupported image MIME type: ${detectedMime}`);
}
const sourceMime =
(detectedMime && HEIC_INPUT_IMAGE_MIMES.has(detectedMime)) ||
(HEIC_INPUT_IMAGE_MIMES.has(declaredMime) && !detectedMime)
? (detectedMime ?? declaredMime)
: declaredMime;
if (!params.limits.allowedMimes.has(sourceMime)) {
throw new Error(`Unsupported image MIME type: ${sourceMime}`);
}
if (!HEIC_INPUT_IMAGE_MIMES.has(sourceMime)) {
return {
type: "image" ,
data: params.buffer.toString("base64" ),
mimeType: sourceMime,
};
}
const normalizedBuffer = await convertHeicToJpeg(params.buffer);
if (normalizedBuffer.byteLength > params.limits.maxBytes) {
throw new Error(
`Image too large after HEIC conversion: ${normalizedBuffer.byteLength} bytes (limit: ${params.limits.maxBytes} bytes)`,
);
}
return {
type: "image" ,
data: normalizedBuffer.toString("base64" ),
mimeType: NORMALIZED_INPUT_IMAGE_MIME,
};
}
export async function extractImageContentFromSource(
source: InputImageSource,
limits: InputImageLimits,
): Promise<InputImageContent> {
if (source.type === "base64" ) {
rejectOversizedBase64Payload({ data: source.data, maxBytes: limits.maxBytes, label: "Image" });
const canonicalData = canonicalizeBase64(source.data);
if (!canonicalData) {
throw new Error("input_image base64 source has invalid 'data' field" );
}
const buffer = Buffer.from(canonicalData, "base64" );
if (buffer.byteLength > limits.maxBytes) {
throw new Error(
`Image too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`,
);
}
return await normalizeInputImage({
buffer,
mimeType: normalizeMimeType(source.mediaType) ?? "image/png" ,
limits,
});
}
if (source.type === "url" ) {
if (!limits.allowUrl) {
throw new Error("input_image URL sources are disabled by config" );
}
const result = await fetchWithGuard({
url: source.url,
maxBytes: limits.maxBytes,
timeoutMs: limits.timeoutMs,
maxRedirects: limits.maxRedirects,
policy: {
allowPrivateNetwork: false ,
hostnameAllowlist: limits.urlAllowlist,
},
auditContext: "openresponses.input_image" ,
});
return await normalizeInputImage({
buffer: result.buffer,
mimeType: result.mimeType,
limits,
});
}
throw new Error(`Unsupported input_image source type: ${(source as { type: string }).type}`);
}
export async function extractFileContentFromSource(params: {
source: InputFileSource;
limits: InputFileLimits;
config?: OpenClawConfig;
}): Promise<InputFileExtractResult> {
const { source, limits } = params;
const filename = source.filename || "file" ;
let buffer: Buffer;
let mimeType: string | undefined;
let charset: string | undefined;
if (source.type === "base64" ) {
rejectOversizedBase64Payload({ data: source.data, maxBytes: limits.maxBytes, label: "File" });
const canonicalData = canonicalizeBase64(source.data);
if (!canonicalData) {
throw new Error("input_file base64 source has invalid 'data' field" );
}
const parsed = parseContentType(source.mediaType);
mimeType = parsed.mimeType;
charset = parsed.charset;
buffer = Buffer.from(canonicalData, "base64" );
} else {
if (!limits.allowUrl) {
throw new Error("input_file URL sources are disabled by config" );
}
const result = await fetchWithGuard({
url: source.url,
maxBytes: limits.maxBytes,
timeoutMs: limits.timeoutMs,
maxRedirects: limits.maxRedirects,
policy: {
allowPrivateNetwork: false ,
hostnameAllowlist: limits.urlAllowlist,
},
auditContext: "openresponses.input_file" ,
});
const parsed = parseContentType(result.contentType);
mimeType = parsed.mimeType ?? normalizeMimeType(result.mimeType);
charset = parsed.charset;
buffer = result.buffer;
}
if (buffer.byteLength > limits.maxBytes) {
throw new Error(`File too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`);
}
if (!mimeType) {
throw new Error("input_file missing media type" );
}
if (!limits.allowedMimes.has(mimeType)) {
throw new Error(`Unsupported file MIME type: ${mimeType}`);
}
if (mimeType === "application/pdf" ) {
const extracted = await extractPdfContent({
buffer,
maxPages: limits.pdf.maxPages,
maxPixels: limits.pdf.maxPixels,
minTextChars: limits.pdf.minTextChars,
...(params.config ? { config: params.config } : {}),
onImageExtractionError: (err) => {
logWarn(`media: PDF image extraction skipped, ${String(err)}`);
},
});
const text = extracted.text ? clampText(extracted.text, limits.maxChars) : "" ;
return {
filename,
text,
images: extracted.images.length > 0 ? extracted.images : undefined,
};
}
const text = clampText(decodeTextContent(buffer, charset), limits.maxChars);
return { filename, text };
}
Messung V0.5 in Prozent C=99 H=96 G=97
¤ Dauer der Verarbeitung: 0.15 Sekunden
(vorverarbeitet am 2026-06-10)
¤
*© Formatika GbR, Deutschland