import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { ExtensionContext } from "@mariozechner/pi-coding-agent"; import {
estimateTokens,
generateSummary as piGenerateSummary,
} from "@mariozechner/pi-coding-agent"; import type { AgentCompactionIdentifierPolicy } from "../config/types.agent-defaults.js"; import { formatErrorMessage } from "../infra/errors.js"; import { retryAsync } from "../infra/retry.js"; import { isAbortError } from "../infra/unhandled-rejections.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { DEFAULT_CONTEXT_TOKENS } from "./defaults.js"; import { isTimeoutError } from "./failover-error.js"; import { repairToolUseResultPairing, stripToolResultDetails } from "./session-transcript-repair.js"; import { extractToolCallsFromAssistant, extractToolResultId } from "./tool-call-id.js";
const log = createSubsystemLogger("compaction");
export const BASE_CHUNK_RATIO = 0.4;
export const MIN_CHUNK_RATIO = 0.15;
export const SAFETY_MARGIN = 1.2; // 20% buffer for estimateTokens() inaccuracy const DEFAULT_SUMMARY_FALLBACK = "No prior history."; const DEFAULT_PARTS = 2; const MERGE_SUMMARIES_INSTRUCTIONS = [ "Merge these partial summaries into a single cohesive summary.", "", "MUST PRESERVE:", "- Active tasks and their current status (in-progress, blocked, pending)", "- Batch operation progress (e.g., '5/17 items completed')", "- The last thing the user requested and what was being done about it", "- Decisions made and their rationale", "- TODOs, open questions, and constraints", "- Any commitments or follow-ups promised", "", "PRIORITIZE recent context over older history. The agent needs to know", "what it was doing, not just what was discussed.",
].join("\n"); const IDENTIFIER_PRESERVATION_INSTRUCTIONS = "Preserve all opaque identifiers exactly as written (no shortening or reconstruction), " + "including UUIDs, hashes, IDs, hostnames, IPs, ports, URLs, and file names.";
export type CompactionSummarizationInstructions = {
identifierPolicy?: AgentCompactionIdentifierPolicy;
identifierInstructions?: string;
};
export function estimateMessagesTokens(messages: AgentMessage[]): number { // SECURITY: toolResult.details can contain untrusted/verbose payloads; never include in LLM-facing compaction. const safe = stripToolResultDetails(messages); return safe.reduce((sum, message) => sum + estimateTokens(message), 0);
}
function estimateCompactionMessageTokens(message: AgentMessage): number { return estimateMessagesTokens([message]);
}
function normalizeParts(parts: number, messageCount: number): number { if (!Number.isFinite(parts) || parts <= 1) { return1;
} return Math.min(Math.max(1, Math.floor(parts)), Math.max(1, messageCount));
}
export function splitMessagesByTokenShare(
messages: AgentMessage[],
parts = DEFAULT_PARTS,
): AgentMessage[][] { if (messages.length === 0) { return [];
} const normalizedParts = normalizeParts(parts, messages.length); if (normalizedParts <= 1) { return [messages];
}
if (currentChunk.length > 0) {
chunks.push(currentChunk);
}
return chunks;
}
/** * Compute adaptive chunk ratio based on average message size. * When messages are large, we use smaller chunks to avoid exceeding model limits.
*/
export function computeAdaptiveChunkRatio(messages: AgentMessage[], contextWindow: number): number { if (messages.length === 0) { return BASE_CHUNK_RATIO;
}
// If average message is > 10% of context, reduce chunk ratio if (avgRatio > 0.1) { const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO); return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction);
}
return BASE_CHUNK_RATIO;
}
/** * Check if a single message is too large to summarize. * If single message > 50% of context, it can't be summarized safely.
*/
export function isOversizedForSummary(msg: AgentMessage, contextWindow: number): boolean { const tokens = estimateCompactionMessageTokens(msg) * SAFETY_MARGIN; return tokens > contextWindow * 0.5;
}
if (messages.length === 0) { return params.previousSummary ?? DEFAULT_SUMMARY_FALLBACK;
}
// Try full summarization first try { return await summarizeChunks(params);
} catch (fullError) {
log.warn(`Full summarization failed: ${formatErrorMessage(fullError)}`);
}
// Fallback 1: Summarize only small messages, note oversized ones const smallMessages: AgentMessage[] = []; const oversizedNotes: string[] = [];
for (const msg of messages) { if (isOversizedForSummary(msg, contextWindow)) { const role = (msg as { role?: string }).role ?? "message"; const tokens = estimateCompactionMessageTokens(msg);
oversizedNotes.push(
`[Large ${role} (~${Math.round(tokens / 1000)}K tokens) omitted from summary]`,
);
} else {
smallMessages.push(msg);
}
}
// When nothing was oversized, `smallMessages` is the same transcript as the full attempt. // Re-summarizing it would duplicate the same failing API work (and duplicate warn logs). if (smallMessages.length > 0 && smallMessages.length !== messages.length) { try { const partialSummary = await summarizeChunks({
...params,
messages: smallMessages,
}); const notes = oversizedNotes.length > 0 ? `\n\n${oversizedNotes.join("\n")}` : ""; return partialSummary + notes;
} catch (partialError) {
log.warn(`Partial summarization also failed: ${formatErrorMessage(partialError)}`);
}
}
// Final fallback: Just note what was there return (
`Context contained ${messages.length} messages (${oversizedNotes.length} oversized). ` +
`Summary unavailable due to size limits.`
);
}
// After dropping a chunk, repair tool_use/tool_result pairing to handle // orphaned tool_results (whose tool_use was in the dropped chunk). // repairToolUseResultPairing drops orphaned tool_results, preventing // "unexpected tool_use_id" errors from Anthropic's API. const repairReport = repairToolUseResultPairing(flatRest); const repairedKept = repairReport.messages;
// Track orphaned tool_results as dropped (they were in kept but their tool_use was dropped) const orphanedCount = repairReport.droppedOrphanCount;
droppedChunks += 1;
droppedMessages += dropped.length + orphanedCount;
droppedTokens += estimateMessagesTokens(dropped); // Note: We don't have the actual orphaned messages to add to droppedMessagesList // since repairToolUseResultPairing doesn't return them. This is acceptable since // the dropped messages are used for summarization, and orphaned tool_results // without their tool_use context aren't useful for summarization anyway.
allDroppedMessages.push(...dropped);
keptMessages = repairedKept;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.