import path from "node:path"; import type { ImageContent } from "@mariozechner/pi-ai"; import { formatErrorMessage } from "../../../infra/errors.js"; import { assertNoWindowsNetworkPath, safeFileURLToPath } from "../../../infra/local-file-access.js"; import type { PromptImageOrderEntry } from "../../../media/prompt-image-order.js"; import { loadWebMedia } from "../../../media/web-media.js"; import { normalizeLowercaseStringOrEmpty } from "../../../shared/string-coerce.js"; import { resolveUserPath } from "../../../utils.js"; import type { ImageSanitizationLimits } from "../../image-sanitization.js"; import {
createSandboxBridgeReadFile,
resolveSandboxedBridgeMediaPath,
} from "../../sandbox-media-paths.js"; import type { SandboxFsBridge } from "../../sandbox/fs-bridge.js"; import { sanitizeImageBlocks } from "../../tool-images.js"; import { log } from "../logger.js";
/** * Matches the opaque media URI written by the Gateway's claim-check offload: * media://inbound/<uuid-or-id> * * Uses an exclusion-based character class rather than a whitelist so that * Unicode filenames (e.g. Chinese characters) preserved by sanitizeFilename * in store.ts are matched correctly. * * Explicitly excluded from the ID segment: * ] — closes the surrounding [media attached: ...] bracket * \s — any whitespace (space, newline, tab) — terminates the token * / — forward slash path separator (traversal prevention) * \ — back slash path separator (traversal prevention) * \x00 — null byte (path injection prevention) * * resolveMediaBufferPath applies its own guards against these characters, but * excluding them here provides defence-in-depth at the parsing layer. * * Example valid IDs: * "1c77ce17-20b9-4546-be64-6e36a9adcb2c.png" * "photo---1c77ce17-20b9-4546-be64-6e36a9adcb2c.png" * "图片---1c77ce17-20b9-4546-be64-6e36a9adcb2c.png"
*/ // eslint-disable-next-line no-control-regex const MEDIA_URI_REGEX = /\bmedia:\/\/inbound\/([^\]\s/\\\x00]+)/;
/** * Result of detecting an image reference in text.
*/
export interface DetectedImageRef { /** The raw matched string from the prompt */
raw: string; /** The type of reference */
type: "path" | "media-uri"; /** The resolved/normalized path, or the raw media URI for media-uri type */
resolved: string;
}
/** * Checks if a file extension indicates an image file.
*/ function isImageExtension(filePath: string): boolean { const ext = normalizeLowercaseStringOrEmpty(path.extname(filePath)); return IMAGE_EXTENSIONS.has(ext);
}
// Pattern for [media attached: path (type) | url] or [media attached N/M: path (type) | url] format // Each bracket = ONE file. The | separates path from URL, not multiple files. // Multi-file format uses separate brackets on separate lines.
MEDIA_ATTACHED_PATTERN.lastIndex = 0;
MESSAGE_IMAGE_PATTERN.lastIndex = 0;
FILE_URL_PATTERN.lastIndex = 0;
PATH_PATTERN.lastIndex = 0;
let match: RegExpExecArray | null; while ((match = MEDIA_ATTACHED_PATTERN.exec(prompt)) !== null) { const content = match[1];
// Skip "[media attached: N files]" header lines if (/^\d+\s+files?$/i.test(content.trim())) { continue;
}
// Check for a Gateway claim-check URI first (media://inbound/<id>). // This must be tested before the extension-based path regex because the // URI has no file extension suffix in its base form. const mediaUriMatch = content.match(MEDIA_URI_REGEX); if (mediaUriMatch) { const uri = `media://inbound/${mediaUriMatch[1]}`; const dedupeKey = normalizeRefForDedupe(uri); if (!seen.has(dedupeKey)) {
seen.add(dedupeKey);
refs.push({ raw: uri, type: "media-uri", resolved: uri });
} continue;
}
// Extract path before the (mime/type) or | delimiter // Format is: path (type) | url OR just: path (type) // Path may contain spaces (e.g., "ChatGPT Image Apr 21.png") // Use non-greedy .+? to stop at first image extension const pathMatch = content.match(MEDIA_ATTACHED_PATH_PATTERN); if (pathMatch?.[1]) {
addPathRef(pathMatch[1].trim());
}
}
// Pattern for [Image: source: /path/...] format from messaging systems while ((match = MESSAGE_IMAGE_PATTERN.exec(prompt)) !== null) { const raw = match[1]?.trim(); if (raw) {
addPathRef(raw);
}
}
// Remote HTTP(S) URLs are intentionally ignored. Native image injection is local-only.
// Pattern for file:// URLs - treat as paths since loadWebMedia handles them while ((match = FILE_URL_PATTERN.exec(prompt)) !== null) { const raw = match[0]; const dedupeKey = normalizeRefForDedupe(raw); if (seen.has(dedupeKey)) { continue;
}
seen.add(dedupeKey); // Use fileURLToPath for proper handling (e.g., file://localhost/path) try { const resolved = safeFileURLToPath(raw);
refs.push({ raw, type: "path", resolved });
} catch { // Skip malformed file:// URLs
}
}
// Pattern for file paths (absolute, relative, or home) // Matches: // - /absolute/path/to/file.ext (including paths with special chars like Messages/Attachments) // - ./relative/path.ext // - ../parent/path.ext // - ~/home/path.ext while ((match = PATH_PATTERN.exec(prompt)) !== null) { // Use capture group 1 (the path without delimiter prefix); skip if undefined if (match[1]) {
addPathRef(match[1]);
}
}
return refs;
}
/** * Loads an image from a file path and returns it as ImageContent. * * @param ref The detected image reference * @param workspaceDir The current workspace directory for resolving relative paths * @param options Optional settings for sandbox and size limits * @returns The loaded image content, or null if loading failed
*/
export async function loadImageFromRef(
ref: DetectedImageRef,
workspaceDir: string,
options?: {
maxBytes?: number;
workspaceOnly?: boolean;
sandbox?: { root: string; bridge: SandboxFsBridge };
},
): Promise<ImageContent | null> { try {
let targetPath = ref.resolved;
if (media.kind !== "image") {
log.debug(`Native image: not an image file: ${targetPath} (got ${media.kind})`); returnnull;
}
// EXIF orientation is already normalized by loadWebMedia -> resizeToJpeg // Default to JPEG since optimization converts images to JPEG format const mimeType = media.contentType ?? "image/jpeg"; const data = media.buffer.toString("base64");
return { type: "image", data, mimeType };
} catch (err) { // Log the actual error for debugging (size limits, network failures, etc.)
log.debug(`Native image: failed to load ${ref.resolved}: ${formatErrorMessage(err)}`); returnnull;
}
}
/** * Checks if a model supports image input based on its input capabilities. * * @param model The model object with input capability array * @returns True if the model supports image input
*/
export function modelSupportsImages(model: { input?: string[] }): boolean { return model.input?.includes("image") ?? false;
}
/** * Detects and loads images referenced in a prompt for models with vision capability. * * This function scans the prompt for image references (file paths and URLs), * loads them, and returns them as ImageContent array ready to be passed to * the model's prompt method. * * @param params Configuration for image detection and loading * @returns Object with loaded images for current prompt only
*/
export async function detectAndLoadPromptImages(params: {
prompt: string;
workspaceDir: string;
model: { input?: string[] };
existingImages?: ImageContent[];
imageOrder?: PromptImageOrderEntry[];
maxBytes?: number;
maxDimensionPx?: number;
workspaceOnly?: boolean;
sandbox?: { root: string; bridge: SandboxFsBridge };
}): Promise<{ /** Images for the current prompt (existingImages + detected in current prompt) */
images: ImageContent[];
detectedRefs: DetectedImageRef[];
loadedCount: number;
skippedCount: number;
}> { // If model doesn't support images, return empty results if (!modelSupportsImages(params.model)) { return {
images: [],
detectedRefs: [],
loadedCount: 0,
skippedCount: 0,
};
}
// Detect images from current prompt const allRefs = detectImageReferences(params.prompt);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.