import { Type } from "typebox"; import { loadConfig } from "../../config/config.js"; import type { OpenClawConfig } from "../../config/types.openclaw.js"; import { formatErrorMessage } from "../../infra/errors.js"; import type { SsrFPolicy } from "../../infra/net/ssrf.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js"; import {
classifyMediaReferenceSource,
normalizeMediaReferenceSource,
} from "../../media/media-reference.js"; import { saveMediaBuffer } from "../../media/store.js"; import { loadWebMedia } from "../../media/web-media.js"; import { readSnakeCaseParamRaw } from "../../param-key.js"; import { resolveUserPath } from "../../utils.js"; import type { DeliveryContext } from "../../utils/delivery-context.js"; import {
resolveVideoGenerationMode,
resolveVideoGenerationModeCapabilities,
} from "../../video-generation/capabilities.js"; import { parseVideoGenerationModelRef } from "../../video-generation/model-ref.js"; import {
generateVideo,
listRuntimeVideoGenerationProviders,
} from "../../video-generation/runtime.js"; import type {
VideoGenerationIgnoredOverride,
VideoGenerationProvider,
VideoGenerationResolution,
VideoGenerationSourceAsset,
} from "../../video-generation/types.js"; import { ToolInputError, readNumberParam, readStringParam } from "./common.js"; import { decodeDataUrl } from "./image-tool.helpers.js"; import {
applyVideoGenerationModelConfigDefaults,
buildMediaReferenceDetails,
buildTaskRunDetails,
normalizeMediaReferenceInputs,
readBooleanToolParam,
readGenerationTimeoutMs,
resolveCapabilityModelConfigForTool,
resolveGenerateAction,
resolveMediaToolLocalRoots,
resolveRemoteMediaSsrfPolicy,
resolveSelectedCapabilityProvider,
} from "./media-tool-shared.js"; import { type ToolModelConfig } from "./model-config.helpers.js"; import {
createSandboxBridgeReadFile,
resolveSandboxedBridgeMediaPath,
type AnyAgentTool,
type SandboxFsBridge,
type ToolFsPolicy,
} from "./tool-runtime.helpers.js"; import {
completeVideoGenerationTaskRun,
createVideoGenerationTaskRun,
failVideoGenerationTaskRun,
recordVideoGenerationTaskProgress,
type VideoGenerationTaskHandle,
wakeVideoGenerationTaskCompletion,
} from "./video-generate-background.js"; import {
createVideoGenerateDuplicateGuardResult,
createVideoGenerateListActionResult,
createVideoGenerateStatusActionResult,
} from "./video-generate-tool.actions.js";
const log = createSubsystemLogger("agents/tools/video-generate"); const MAX_INPUT_IMAGES = 9; const MAX_INPUT_VIDEOS = 4; const MAX_INPUT_AUDIOS = 3; const SUPPORTED_ASPECT_RATIOS = new Set([ "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9", // Provider-specific sentinel: accepted at the tool boundary, then forwarded // to the active provider only if that provider declares "adaptive" in its // capabilities.aspectRatios list. Providers that do not declare it see the // value pushed into `ignoredOverrides` in the normalization layer so the // tool surfaces a user-visible "ignored override" warning rather than // silently dropping the request. Seedance uses this to auto-detect the // ratio from input image dimensions. "adaptive",
]);
const VideoGenerateToolSchema = Type.Object({
action: Type.Optional(
Type.String({
description: 'Optional action: "generate" (default), "status" to inspect the active session task, or "list" to inspect available providers/models.',
}),
),
prompt: Type.Optional(Type.String({ description: "Video generation prompt." })),
image: Type.Optional(
Type.String({
description: "Optional single reference image path or URL.",
}),
),
images: Type.Optional(
Type.Array(Type.String(), {
description: `Optional reference images (up to ${MAX_INPUT_IMAGES}).`,
}),
),
imageRoles: Type.Optional(
Type.Array(Type.String(), {
description: "Optional semantic roles for the combined reference image list, parallel by index. " + "The list is `image` (if provided) followed by each entry in `images`, in order, " + "after de-duplication. " + 'Canonical values: "first_frame", "last_frame", "reference_image". ' + "Providers may accept additional role strings. " + "Must not have more entries than the combined image list; use an empty string to leave a position unset.",
}),
),
video: Type.Optional(
Type.String({
description: "Optional single reference video path or URL.",
}),
),
videos: Type.Optional(
Type.Array(Type.String(), {
description: `Optional reference videos (up to ${MAX_INPUT_VIDEOS}).`,
}),
),
videoRoles: Type.Optional(
Type.Array(Type.String(), {
description: "Optional semantic roles for the combined reference video list, parallel by index. " + "The list is `video` (if provided) followed by each entry in `videos`, in order, " + "after de-duplication. " + 'Canonical value: "reference_video". Providers may accept additional role strings. ' + "Must not have more entries than the combined video list; use an empty string to leave a position unset.",
}),
),
audioRef: Type.Optional(
Type.String({
description: "Optional single reference audio path or URL (e.g. background music).",
}),
),
audioRefs: Type.Optional(
Type.Array(Type.String(), {
description: `Optional reference audios (up to ${MAX_INPUT_AUDIOS}).`,
}),
),
audioRoles: Type.Optional(
Type.Array(Type.String(), {
description: "Optional semantic roles for the combined reference audio list, parallel by index. " + "The list is `audioRef` (if provided) followed by each entry in `audioRefs`, in order, " + "after de-duplication. " + 'Canonical value: "reference_audio". Providers may accept additional role strings. ' + "Must not have more entries than the combined audio list; use an empty string to leave a position unset.",
}),
),
model: Type.Optional(
Type.String({ description: "Optional provider/model override, e.g. qwen/wan2.6-t2v." }),
),
filename: Type.Optional(
Type.String({
description: "Optional output filename hint. OpenClaw preserves the basename and saves under its managed media directory.",
}),
),
size: Type.Optional(
Type.String({
description: "Optional size hint like 1280x720 or 1920x1080 when the provider supports it.",
}),
),
aspectRatio: Type.Optional(
Type.String({
description: 'Optional aspect ratio hint: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9, or "adaptive".',
}),
),
resolution: Type.Optional(
Type.String({
description: "Optional resolution hint: 480P, 720P, 768P, or 1080P.",
}),
),
durationSeconds: Type.Optional(
Type.Number({
description: "Optional target duration in seconds. OpenClaw may round this to the nearest provider-supported duration.",
minimum: 1,
}),
),
audio: Type.Optional(
Type.Boolean({
description: "Optional audio toggle when the provider supports generated audio.",
}),
),
watermark: Type.Optional(
Type.Boolean({
description: "Optional watermark toggle when the provider supports it.",
}),
),
providerOptions: Type.Optional(
Type.Record(Type.String(), Type.Unknown(), {
description: 'Optional provider-specific options as a JSON object, e.g. `{"seed": 42, "draft": true}`. ' + "Each provider declares its own accepted keys and primitive types (number/boolean/string) "+ "via its capabilities; unknown keys or type mismatches skip the candidate during fallback " + "and never silently reach the wrong provider. Run `video_generate action=list` to see which "+ "keys each provider accepts.",
}),
),
timeoutMs: Type.Optional(
Type.Number({
description: "Optional provider request timeout in milliseconds.",
minimum: 1,
}),
),
});
function validateVideoGenerationCapabilities(params: {
provider: VideoGenerationProvider | undefined;
model?: string;
inputImageCount: number;
inputVideoCount: number;
inputAudioCount: number;
size?: string;
aspectRatio?: string;
resolution?: VideoGenerationResolution;
durationSeconds?: number;
audio?: boolean;
watermark?: boolean;
}) { const provider = params.provider; if (!provider) { return;
} const mode = resolveVideoGenerationMode({
inputImageCount: params.inputImageCount,
inputVideoCount: params.inputVideoCount,
}); const { capabilities: caps } = resolveVideoGenerationModeCapabilities({
provider,
inputImageCount: params.inputImageCount,
inputVideoCount: params.inputVideoCount,
}); if (!caps && mode === "imageToVideo" && params.inputVideoCount === 0) { thrownew ToolInputError(`${provider.id} does not support image-to-video reference inputs.`);
} if (!caps && mode === "videoToVideo" && params.inputImageCount === 0) { thrownew ToolInputError(`${provider.id} does not support video-to-video reference inputs.`);
} if (!caps) { return;
} if (
mode === "imageToVideo" && "enabled" in caps &&
!caps.enabled &&
params.inputVideoCount === 0
) { thrownew ToolInputError(`${provider.id} does not support image-to-video reference inputs.`);
} if (
mode === "videoToVideo" && "enabled" in caps &&
!caps.enabled &&
params.inputImageCount === 0
) { thrownew ToolInputError(`${provider.id} does not support video-to-video reference inputs.`);
} if (params.inputImageCount > 0) { const maxInputImages = caps.maxInputImages ?? MAX_INPUT_IMAGES; if (params.inputImageCount > maxInputImages) { thrownew ToolInputError(
`${provider.id} supports at most ${maxInputImages} reference image${maxInputImages === 1? "" : "s"}.`,
);
}
} if (params.inputVideoCount > 0) { const maxInputVideos = caps.maxInputVideos ?? MAX_INPUT_VIDEOS; if (params.inputVideoCount > maxInputVideos) { thrownew ToolInputError(
`${provider.id} supports at most ${maxInputVideos} reference video${maxInputVideos === 1? "" : "s"}.`,
);
}
} // Audio-count validation is intentionally deferred to runtime.ts (generateVideo). // The runtime guard skips per-candidate providers that lack audio support, allowing // fallback candidates that do support audio to run. A ToolInputError here would fire // against only the primary provider and prevent valid fallback-based audio requests. // maxDurationSeconds validation is intentionally deferred to runtime.ts (generateVideo). // The runtime guard skips per-candidate providers whose hard cap is below the requested // duration, allowing a fallback with a higher cap to run — same rationale as the audio // check above. When providers declare an explicit supportedDurationSeconds list, runtime // normalization snaps to the nearest valid value instead of skipping.
}
function formatIgnoredVideoGenerationOverride(override: VideoGenerationIgnoredOverride): string { return `${override.key}=${String(override.value)}`;
}
type VideoGenerateSandboxConfig = {
root: string;
bridge: SandboxFsBridge;
};
type VideoGenerateBackgroundScheduler = (work: () => Promise<void>) => void;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.