/** * Voice call response generator - uses the embedded Pi agent for tool support. * Routes voice responses through the same agent infrastructure as messaging.
*/
import crypto from "node:crypto"; import { applyModelOverrideToSessionEntry } from "openclaw/plugin-sdk/config-runtime"; import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; import type { SessionEntry } from "../api.js"; import type { VoiceCallConfig } from "./config.js"; import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js"; import { resolveVoiceResponseModel } from "./response-model.js";
const VOICE_SPOKEN_OUTPUT_CONTRACT = [ "Output format requirements:", '- Return only valid JSON in this exact shape: {"spoken":"..."}', "- Do not include markdown, code fences, planning text, or extra keys.", '- Put exactly what should be spoken to the caller into "spoken".', '- If there is nothing to say, return {"spoken":""}.',
].join("\n");
function isLikelyMetaReasoningParagraph(paragraph: string): boolean { const lower = normalizeLowercaseStringOrEmpty(paragraph); if (!lower) { returnfalse;
}
if (lower.startsWith("thinking process")) { returntrue;
} if (lower.startsWith("reasoning:") || lower.startsWith("analysis:")) { returntrue;
} if (
lower.startsWith("the user ") &&
(lower.includes("i should") || lower.includes("i need to") || lower.includes("i will"))
) { returntrue;
} if (
lower.includes("this is a natural continuation of the conversation") ||
lower.includes("keep the conversation flowing")
) { returntrue;
}
returnfalse;
}
function sanitizePlainSpokenText(text: string): string | null { const withoutCodeFences = text.replace(/```[\s\S]*?```/g, " ").trim(); if (!withoutCodeFences) { returnnull;
}
function resolveVoiceSandboxSessionKey(agentId: string, sessionKey: string): string { const trimmed = sessionKey.trim(); if (trimmed.toLowerCase().startsWith("agent:")) { return trimmed;
} return `agent:${agentId}:${trimmed}`;
}
/** * Generate a voice response using the embedded Pi agent with full tool support. * Uses the same agent infrastructure as messaging for consistent behavior.
*/
export async function generateVoiceResponse(
params: VoiceResponseParams,
): Promise<VoiceResponseResult> { const { voiceConfig, callId, from, transcript, userMessage, coreConfig, agentRuntime } = params;
if (!coreConfig) { return { text: null, error: "Core config unavailable for voice response" };
} const cfg = coreConfig;
// Build voice-specific session key based on phone number const normalizedPhone = from.replace(/\D/g, ""); const sessionKey = `voice:${normalizedPhone}`; const agentId = voiceConfig.agentId ?? "main";
// Build system prompt with conversation history const basePrompt =
voiceConfig.responseSystemPrompt ??
`You are ${agentName}, a helpful voice assistant on a phone call. Keep responses brief and conversational (1-2 sentences max). Be natural and friendly. The caller's phone number is ${from}. You have access to tools - use them when helpful.`;
let extraSystemPrompt = basePrompt; if (transcript.length > 0) { const history = transcript
.map((entry) => `${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`)
.join("\n");
extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
}
extraSystemPrompt = `${extraSystemPrompt}\n\n${VOICE_SPOKEN_OUTPUT_CONTRACT}`;
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.19Bemerkung:
(vorverarbeitet am 2026-06-07)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.