import fs from
"node:fs/promises" ;
import os from
"node:os" ;
import path from
"node:path" ;
import { afterEach, beforeEach, describe, expect, it, vi } from
"vitest" ;
const {
runQaManualLane,
runQaSuiteFromRuntime,
runQaCharacterEval,
runQaMultipass,
runTelegramQaLive,
startQaLabServer,
writeQaDockerHarnessFiles,
buildQaDockerHarnessImage,
runQaDockerUp,
defaultQaRuntimeModelForMode,
} = vi.hoisted(() => ({
runQaManualLane: vi.fn(),
runQaSuiteFromRuntime: vi.fn(),
runQaCharacterEval: vi.fn(),
runQaMultipass: vi.fn(),
runTelegramQaLive: vi.fn(),
startQaLabServer: vi.fn(),
writeQaDockerHarnessFiles: vi.fn(),
buildQaDockerHarnessImage: vi.fn(),
runQaDockerUp: vi.fn(),
defaultQaRuntimeModelForMode:
vi.fn<(mode: string, options?: { alternate?:
boolean }) => string>(),
}));
vi.mock(
"./manual-lane.runtime.js" , () => ({
runQaManualLane,
}));
vi.mock(
"./suite-launch.runtime.js" , () => ({
runQaSuiteFromRuntime,
}));
vi.mock(
"./character-eval.js" , () => ({
runQaCharacterEval,
}));
vi.mock(
"./multipass.runtime.js" , () => ({
runQaMultipass,
}));
vi.mock(
"./live-transports/telegram/telegram-live.runtime.js" , () => ({
runTelegramQaLive,
}));
vi.mock(
"./lab-server.js" , () => ({
startQaLabServer,
}));
vi.mock(
"./docker-harness.js" , () => ({
writeQaDockerHarnessFiles,
buildQaDockerHarnessImage,
}));
vi.mock(
"./docker-up.runtime.js" , () => ({
runQaDockerUp,
}));
vi.mock(
"./model-selection.runtime.js" , () => ({
defaultQaRuntimeModelForMode,
}));
import { resolveRepoRelativeOutputDir } from
"./cli-paths.js" ;
import {
runQaLabSelfCheckCommand,
runQaDockerBuildImageCommand,
runQaDockerScaffoldCommand,
runQaDockerUpCommand,
runQaCharacterEvalCommand,
runQaCoverageReportCommand,
runQaManualLaneCommand,
runQaParityReportCommand,
runQaSuiteCommand,
} from
"./cli.runtime.js" ;
import { runQaTelegramCommand } from
"./live-transports/telegram/cli.runtime.js" ;
import { defaultQaModelForMode as defaultQaProviderModelForMode } from
"./model-selection.js" ;
import type { QaProviderModeInput } from
"./run-config.js" ;
describe(
"qa cli runtime" , () => {
let stdoutWrite: ReturnType<
typeof vi.spyOn>;
let stderrWrite: ReturnType<
typeof vi.spyOn>;
let suiteArtifactsDir: string;
let suiteReportPath: string;
let suiteSummaryPath: string;
beforeEach(async () => {
suiteArtifactsDir = await fs.mkdtemp(path.join(os.tmpdir(),
"qa-suite-runtime-" ));
suiteReportPath = path.join(suiteArtifactsDir,
"qa-suite-report.md" );
suiteSummaryPath = path.join(suiteArtifactsDir,
"qa-suite-summary.json" );
await fs.writeFile(suiteReportPath,
"# QA Suite Report\n" ,
"utf8" );
await fs.writeFile(
suiteSummaryPath,
JSON.stringify({
counts: {
total:
1 ,
passed:
1 ,
failed:
0 ,
},
scenarios: [],
}),
"utf8" ,
);
stdoutWrite = vi.spyOn(process.stdout,
"write" ).mockReturnValue(
true );
stderrWrite = vi.spyOn(process.stderr,
"write" ).mockReturnValue(
true );
runQaSuiteFromRuntime.mockReset();
runQaCharacterEval.mockReset();
runQaManualLane.mockReset();
runQaMultipass.mockReset();
runTelegramQaLive.mockReset();
startQaLabServer.mockReset();
writeQaDockerHarnessFiles.mockReset();
buildQaDockerHarnessImage.mockReset();
runQaDockerUp.mockReset();
defaultQaRuntimeModelForMode.mockImplementation(
(mode: string, options?: { alternate?:
boolean }) =>
defaultQaProviderModelForMode(mode as QaProviderModeInput, options),
);
runQaSuiteFromRuntime.mockResolvedValue({
watchUrl:
"http://127.0.0.1:43124 ",
reportPath: suiteReportPath,
summaryPath: suiteSummaryPath,
scenarios: [],
});
runQaCharacterEval.mockResolvedValue({
reportPath:
"/tmp/character-report.md" ,
summaryPath:
"/tmp/character-summary.json" ,
});
runQaManualLane.mockResolvedValue({
model:
"openai/gpt-5.4" ,
waited: { status:
"ok" },
reply:
"done" ,
watchUrl:
"http://127.0.0.1:43124 ",
});
runQaMultipass.mockResolvedValue({
outputDir:
"/tmp/multipass" ,
reportPath:
"/tmp/multipass/qa-suite-report.md" ,
summaryPath:
"/tmp/multipass/qa-suite-summary.json" ,
hostLogPath:
"/tmp/multipass/multipass-host.log" ,
bootstrapLogPath:
"/tmp/multipass/multipass-guest-bootstrap.log" ,
guestScriptPath:
"/tmp/multipass/multipass-guest-run.sh" ,
vmName:
"openclaw-qa-test" ,
scenarioIds: [
"channel-chat-baseline" ],
});
runTelegramQaLive.mockResolvedValue({
outputDir:
"/tmp/telegram" ,
reportPath:
"/tmp/telegram/report.md" ,
summaryPath:
"/tmp/telegram/summary.json" ,
observedMessagesPath:
"/tmp/telegram/observed.json" ,
scenarios: [],
});
startQaLabServer.mockResolvedValue({
baseUrl:
"http://127.0.0.1:58000 ",
runSelfCheck: vi.fn().mockResolvedValue({
outputPath:
"/tmp/report.md" ,
}),
stop: vi.fn(),
});
writeQaDockerHarnessFiles.mockResolvedValue({
outputDir:
"/tmp/openclaw-repo/.artifacts/qa-docker" ,
});
buildQaDockerHarnessImage.mockResolvedValue({
imageName:
"openclaw:qa-local-prebaked" ,
});
runQaDockerUp.mockResolvedValue({
outputDir:
"/tmp/openclaw-repo/.artifacts/qa-docker" ,
qaLabUrl:
"http://127.0.0.1:43124 ",
gatewayUrl:
"http://127.0.0.1:18789/ ",
stopCommand:
"docker compose down" ,
});
});
afterEach(async () => {
stdoutWrite.mockRestore();
stderrWrite.mockRestore();
vi.clearAllMocks();
await fs.rm(suiteArtifactsDir, { recursive:
true , force:
true });
});
it(
"resolves suite repo-root-relative paths before dispatching" , async () => {
await runQaSuiteCommand({
repoRoot:
"/tmp/openclaw-repo" ,
outputDir:
".artifacts/qa/frontier" ,
providerMode:
"live-frontier" ,
primaryModel:
"openai/gpt-5.4" ,
alternateModel:
"anthropic/claude-sonnet-4-6" ,
fastMode:
true ,
thinking:
"medium" ,
scenarioIds: [
"approval-turn-tool-followthrough" ],
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({
repoRoot: path.resolve(
"/tmp/openclaw-repo" ),
outputDir: path.resolve(
"/tmp/openclaw-repo" ,
".artifacts/qa/frontier" ),
transportId:
"qa-channel" ,
providerMode:
"live-frontier" ,
primaryModel:
"openai/gpt-5.4" ,
alternateModel:
"anthropic/claude-sonnet-4-6" ,
fastMode:
true ,
thinkingDefault:
"medium" ,
scenarioIds: [
"approval-turn-tool-followthrough" ],
});
});
it(
"resolves telegram qa repo-root-relative paths before dispatching" , async () => {
await runQaTelegramCommand({
repoRoot:
"/tmp/openclaw-repo" ,
outputDir:
".artifacts/qa/telegram" ,
providerMode:
"live-frontier" ,
primaryModel:
"openai/gpt-5.4" ,
alternateModel:
"openai/gpt-5.4" ,
fastMode:
true ,
scenarioIds: [
"telegram-help-command" ],
sutAccountId:
"sut-live" ,
});
expect(runTelegramQaLive).toHaveBeenCalledWith({
repoRoot: path.resolve(
"/tmp/openclaw-repo" ),
outputDir: path.resolve(
"/tmp/openclaw-repo" ,
".artifacts/qa/telegram" ),
providerMode:
"live-frontier" ,
primaryModel:
"openai/gpt-5.4" ,
alternateModel:
"openai/gpt-5.4" ,
fastMode:
true ,
allowFailures: undefined,
scenarioIds: [
"telegram-help-command" ],
sutAccountId:
"sut-live" ,
});
});
it(
"rejects output dirs that escape the repo root" , () => {
expect(() => resolveRepoRelativeOutputDir(
"/tmp/openclaw-repo" ,
"../outside" )).toThr
ow(
"--output-dir must stay within the repo root." ,
);
expect(() => resolveRepoRelativeOutputDir("/tmp/openclaw-repo" , "/tmp/outside" )).toThrow(
"--output-dir must be a relative path inside the repo root." ,
);
});
it("defaults telegram qa runs onto the live provider lane" , async () => {
await runQaTelegramCommand({
repoRoot: "/tmp/openclaw-repo" ,
scenarioIds: ["telegram-help-command" ],
});
expect(runTelegramQaLive).toHaveBeenCalledWith(
expect.objectContaining({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
providerMode: "live-frontier" ,
allowFailures: undefined,
}),
);
});
it("sets a failing exit code when telegram scenarios fail" , async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
runTelegramQaLive.mockResolvedValueOnce({
outputDir: "/tmp/telegram" ,
reportPath: "/tmp/telegram/report.md" ,
summaryPath: "/tmp/telegram/summary.json" ,
observedMessagesPath: "/tmp/telegram/observed.json" ,
scenarios: [
{
id: "telegram-help-command" ,
title: "Telegram help command reply" ,
status: "fail" ,
details: "missing expected text" ,
},
],
});
try {
await runQaTelegramCommand({
repoRoot: "/tmp/openclaw-repo" ,
});
expect(process.exitCode).toBe(1 );
} finally {
process.exitCode = priorExitCode;
}
});
it("keeps telegram exit code clear when --allow-failures is set" , async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
runTelegramQaLive.mockResolvedValueOnce({
outputDir: "/tmp/telegram" ,
reportPath: "/tmp/telegram/report.md" ,
summaryPath: "/tmp/telegram/summary.json" ,
observedMessagesPath: "/tmp/telegram/observed.json" ,
scenarios: [
{
id: "telegram-help-command" ,
title: "Telegram help command reply" ,
status: "fail" ,
details: "missing expected text" ,
},
],
});
try {
await runQaTelegramCommand({
repoRoot: "/tmp/openclaw-repo" ,
allowFailures: true ,
});
expect(process.exitCode).toBeUndefined();
} finally {
process.exitCode = priorExitCode;
}
});
it("passes host suite concurrency through" , async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
scenarioIds: ["channel-chat-baseline" , "thread-follow-up" ],
concurrency: 3 ,
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledWith(
expect.objectContaining({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
transportId: "qa-channel" ,
scenarioIds: ["channel-chat-baseline" , "thread-follow-up" ],
concurrency: 3 ,
}),
);
});
it("sets a failing exit code when host suite scenarios fail" , async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
await fs.writeFile(
suiteSummaryPath,
JSON.stringify({
counts: {
total: 1 ,
passed: 0 ,
failed: 1 ,
},
scenarios: [{ name: "channel chat baseline" , status: "fail" }],
}),
"utf8" ,
);
runQaSuiteFromRuntime.mockResolvedValueOnce({
watchUrl: "http://127.0.0.1:43124 ",
reportPath: suiteReportPath,
summaryPath: suiteSummaryPath,
scenarios: [
{
name: "channel chat baseline" ,
status: "fail" ,
steps: [],
},
],
});
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
});
expect(process.exitCode).toBe(1 );
} finally {
process.exitCode = priorExitCode;
}
});
it("keeps host suite exit code clear when --allow-failures is set" , async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
await fs.writeFile(
suiteSummaryPath,
JSON.stringify({
counts: {
total: 1 ,
passed: 0 ,
failed: 1 ,
},
scenarios: [{ name: "channel chat baseline" , status: "fail" }],
}),
"utf8" ,
);
runQaSuiteFromRuntime.mockResolvedValueOnce({
watchUrl: "http://127.0.0.1:43124 ",
reportPath: suiteReportPath,
summaryPath: suiteSummaryPath,
scenarios: [
{
name: "channel chat baseline" ,
status: "fail" ,
steps: [],
},
],
});
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
allowFailures: true ,
});
expect(process.exitCode).toBeUndefined();
} finally {
process.exitCode = priorExitCode;
}
});
it("retries host suite runs once for retryable infra failures" , async () => {
runQaSuiteFromRuntime
.mockRejectedValueOnce(new Error("agent.wait timeout while waiting for transport ready" ))
.mockResolvedValueOnce({
watchUrl: "http://127.0.0.1:43124 ",
reportPath: suiteReportPath,
summaryPath: suiteSummaryPath,
scenarios: [],
});
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(2 );
expect(stderrWrite).toHaveBeenCalledWith(
expect.stringContaining("[qa-suite] infra retry 1/1: agent.wait timeout" ),
);
});
it("retries host suite runs once for qa-channel readiness timeouts" , async () => {
runQaSuiteFromRuntime
.mockRejectedValueOnce(
new Error(
"timed out after 180000ms waiting for qa-channel ready; last status: no qa-channel accounts reported" ,
),
)
.mockResolvedValueOnce({
watchUrl: "http://127.0.0.1:43124 ",
reportPath: suiteReportPath,
summaryPath: suiteSummaryPath,
scenarios: [],
});
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(2 );
expect(stderrWrite).toHaveBeenCalledWith(
expect.stringContaining(
"[qa-suite] infra retry 1/1: timed out after 180000ms waiting for qa-channel ready" ,
),
);
});
it("does not retry host suite runs for generic timeout wording" , async () => {
runQaSuiteFromRuntime.mockRejectedValueOnce(
new Error("approval-turn timed out waiting for post-approval read" ),
);
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
}),
).rejects.toThrow("approval-turn timed out waiting for post-approval read" );
expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(1 );
});
it("does not retry host suite runs for semantic failures" , async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
await fs.writeFile(
suiteSummaryPath,
JSON.stringify({
counts: {
total: 1 ,
passed: 0 ,
failed: 1 ,
},
scenarios: [{ name: "channel chat baseline" , status: "fail" }],
}),
"utf8" ,
);
runQaSuiteFromRuntime.mockResolvedValueOnce({
watchUrl: "http://127.0.0.1:43124 ",
reportPath: suiteReportPath,
summaryPath: suiteSummaryPath,
scenarios: [
{
name: "channel chat baseline" ,
status: "fail" ,
steps: [],
},
],
});
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledTimes(1 );
expect(process.exitCode).toBe(1 );
} finally {
process.exitCode = priorExitCode;
}
});
it("runs a host-only parity preflight against the sentinel scenario" , async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
providerMode: "mock-openai" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "anthropic/claude-opus-4-6" ,
preflight: true ,
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
outputDir: expect.stringMatching(
/^\/tmp\/openclaw-repo\/\.artifacts\/qa-e2e\/preflight\/suite-/,
),
transportId: "qa-channel" ,
providerMode: "mock-openai" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "anthropic/claude-opus-4-6" ,
scenarioIds: ["approval-turn-tool-followthrough" ],
concurrency: 1 ,
});
expect(stdoutWrite).toHaveBeenCalledWith(
expect.stringContaining("QA parity preflight summary:" ),
);
});
it("throws when parity preflight finds a failing sentinel scenario" , async () => {
await fs.writeFile(
suiteSummaryPath,
JSON.stringify({
counts: {
total: 1 ,
passed: 0 ,
failed: 1 ,
},
scenarios: [{ name: "approval turn tool followthrough" , status: "fail" }],
}),
"utf8" ,
);
runQaSuiteFromRuntime.mockResolvedValueOnce({
watchUrl: "http://127.0.0.1:43124 ",
reportPath: suiteReportPath,
summaryPath: suiteSummaryPath,
scenarios: [{ name: "approval turn tool followthrough" , status: "fail" , steps: [] }],
});
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
preflight: true ,
}),
).rejects.toThrow("QA parity preflight failed with 1 failing scenario." );
});
it("keeps parity preflight exit code clear when --allow-failures is set" , async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
await fs.writeFile(
suiteSummaryPath,
JSON.stringify({
counts: {
total: 1 ,
passed: 0 ,
failed: 1 ,
},
scenarios: [{ name: "approval turn tool followthrough" , status: "fail" }],
}),
"utf8" ,
);
runQaSuiteFromRuntime.mockResolvedValueOnce({
watchUrl: "http://127.0.0.1:43124 ",
reportPath: suiteReportPath,
summaryPath: suiteSummaryPath,
scenarios: [{ name: "approval turn tool followthrough" , status: "fail" , steps: [] }],
});
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
preflight: true ,
allowFailures: true ,
});
expect(process.exitCode).toBeUndefined();
} finally {
process.exitCode = priorExitCode;
}
});
it("rejects preflight on the multipass runner" , async () => {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
runner: "multipass" ,
preflight: true ,
}),
).rejects.toThrow("--preflight requires --runner host." );
});
it("passes host suite CLI auth mode through" , async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
providerMode: "live-frontier" ,
primaryModel: "claude-cli/claude-sonnet-4-6" ,
alternateModel: "claude-cli/claude-sonnet-4-6" ,
cliAuthMode: "subscription" ,
scenarioIds: ["claude-cli-provider-capabilities-subscription" ],
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledWith(
expect.objectContaining({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
providerMode: "live-frontier" ,
primaryModel: "claude-cli/claude-sonnet-4-6" ,
alternateModel: "claude-cli/claude-sonnet-4-6" ,
claudeCliAuthMode: "subscription" ,
scenarioIds: ["claude-cli-provider-capabilities-subscription" ],
}),
);
});
it("expands the agentic parity pack onto the suite scenario list" , async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
parityPack: "agentic" ,
scenarioIds: ["channel-chat-baseline" ],
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledWith(
expect.objectContaining({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
scenarioIds: [
"channel-chat-baseline" ,
"approval-turn-tool-followthrough" ,
"model-switch-tool-continuity" ,
"source-docs-discovery-report" ,
"image-understanding-attachment" ,
"compaction-retry-mutating-tool" ,
"subagent-handoff" ,
"subagent-fanout-synthesis" ,
"subagent-stale-child-links" ,
"memory-recall" ,
"thread-memory-isolation" ,
"config-restart-capability-flip" ,
"instruction-followthrough-repo-contract" ,
],
}),
);
});
it("rejects unknown suite CLI auth modes" , async () => {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
cliAuthMode: "magic" ,
}),
).rejects.toThrow("--cli-auth-mode must be one of auto, api-key, subscription" );
});
it("sets a failing exit code when the parity gate fails" , async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-parity-" ));
const priorExitCode = process.exitCode;
process.exitCode = undefined;
try {
await fs.writeFile(
path.join(repoRoot, "candidate.json" ),
JSON.stringify({
scenarios: [{ name: "Approval turn tool followthrough" , status: "pass" }],
}),
"utf8" ,
);
await fs.writeFile(
path.join(repoRoot, "baseline.json" ),
JSON.stringify({
scenarios: [{ name: "Approval turn tool followthrough" , status: "pass" }],
}),
"utf8" ,
);
await runQaParityReportCommand({
repoRoot,
candidateSummary: "candidate.json" ,
baselineSummary: "baseline.json" ,
});
expect(process.exitCode).toBe(1 );
} finally {
process.exitCode = priorExitCode;
await fs.rm(repoRoot, { recursive: true , force: true });
}
});
it("prints a markdown coverage report from scenario metadata" , async () => {
await runQaCoverageReportCommand({ repoRoot: process.cwd() });
expect(stdoutWrite).toHaveBeenCalledWith(expect.stringContaining("# QA Coverage Inventory" ));
expect(stdoutWrite).toHaveBeenCalledWith(expect.stringContaining("memory.recall" ));
});
it("resolves character eval paths and passes model refs through" , async () => {
await runQaCharacterEvalCommand({
repoRoot: "/tmp/openclaw-repo" ,
outputDir: ".artifacts/qa/character" ,
model: [
"openai/gpt-5.4,thinking=xhigh,fast=false" ,
"codex-cli/test-model,thinking=high,fast" ,
],
scenario: "character-vibes-gollum" ,
fast: true ,
thinking: "medium" ,
modelThinking: ["codex-cli/test-model=medium" ],
judgeModel: ["openai/gpt-5.4,thinking=xhigh,fast" , "anthropic/claude-opus-4-6,thinking=high" ],
judgeTimeoutMs: 180 _000 ,
blindJudgeModels: true ,
concurrency: 4 ,
judgeConcurrency: 3 ,
});
expect(runQaCharacterEval).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
outputDir: path.resolve("/tmp/openclaw-repo" , ".artifacts/qa/character" ),
models: ["openai/gpt-5.4" , "codex-cli/test-model" ],
scenarioId: "character-vibes-gollum" ,
candidateFastMode: true ,
candidateThinkingDefault: "medium" ,
candidateThinkingByModel: { "codex-cli/test-model" : "medium" },
candidateModelOptions: {
"openai/gpt-5.4" : { thinkingDefault: "xhigh" , fastMode: false },
"codex-cli/test-model" : { thinkingDefault: "high" , fastMode: true },
},
judgeModels: ["openai/gpt-5.4" , "anthropic/claude-opus-4-6" ],
judgeModelOptions: {
"openai/gpt-5.4" : { thinkingDefault: "xhigh" , fastMode: true },
"anthropic/claude-opus-4-6" : { thinkingDefault: "high" },
},
judgeTimeoutMs: 180 _000 ,
judgeBlindModels: true ,
candidateConcurrency: 4 ,
judgeConcurrency: 3 ,
progress: expect.any(Function ),
});
});
it("lets character eval auto-select candidate fast mode when --fast is omitted" , async () => {
await runQaCharacterEvalCommand({
repoRoot: "/tmp/openclaw-repo" ,
model: ["openai/gpt-5.4" ],
});
expect(runQaCharacterEval).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
outputDir: undefined,
models: ["openai/gpt-5.4" ],
scenarioId: undefined,
candidateFastMode: undefined,
candidateThinkingDefault: undefined,
candidateThinkingByModel: undefined,
candidateModelOptions: undefined,
judgeModels: undefined,
judgeModelOptions: undefined,
judgeTimeoutMs: undefined,
judgeBlindModels: undefined,
candidateConcurrency: undefined,
judgeConcurrency: undefined,
progress: expect.any(Function ),
});
});
it("rejects invalid character eval thinking levels" , async () => {
await expect(
runQaCharacterEvalCommand({
repoRoot: "/tmp/openclaw-repo" ,
model: ["openai/gpt-5.4" ],
thinking: "enormous" ,
}),
).rejects.toThrow("--thinking must be one of" );
await expect(
runQaCharacterEvalCommand({
repoRoot: "/tmp/openclaw-repo" ,
model: ["openai/gpt-5.4,thinking=galaxy" ],
}),
).rejects.toThrow("--model thinking must be one of" );
await expect(
runQaCharacterEvalCommand({
repoRoot: "/tmp/openclaw-repo" ,
model: ["openai/gpt-5.4,warp" ],
}),
).rejects.toThrow("--model options must be thinking=<level>" );
await expect(
runQaCharacterEvalCommand({
repoRoot: "/tmp/openclaw-repo" ,
model: ["openai/gpt-5.4" ],
modelThinking: ["openai/gpt-5.4" ],
}),
).rejects.toThrow("--model-thinking must use provider/model=level" );
});
it("passes the explicit repo root into manual runs" , async () => {
await runQaManualLaneCommand({
repoRoot: "/tmp/openclaw-repo" ,
providerMode: "live-frontier" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "openai/gpt-5.4" ,
fastMode: true ,
message: "read qa kickoff and reply short" ,
timeoutMs: 45 _000 ,
});
expect(runQaManualLane).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
transportId: "qa-channel" ,
providerMode: "live-frontier" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "openai/gpt-5.4" ,
fastMode: true ,
message: "read qa kickoff and reply short" ,
timeoutMs: 45 _000 ,
});
});
it("routes suite runs through multipass when the runner is selected" , async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
outputDir: ".artifacts/qa-multipass" ,
runner: "multipass" ,
providerMode: "mock-openai" ,
scenarioIds: ["channel-chat-baseline" ],
allowFailures: true ,
concurrency: 3 ,
image: "lts" ,
cpus: 2 ,
memory: "4G" ,
disk: "24G" ,
});
expect(runQaMultipass).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
outputDir: path.resolve("/tmp/openclaw-repo" , ".artifacts/qa-multipass" ),
transportId: "qa-channel" ,
providerMode: "mock-openai" ,
primaryModel: undefined,
alternateModel: undefined,
fastMode: undefined,
allowFailures: true ,
scenarioIds: ["channel-chat-baseline" ],
concurrency: 3 ,
image: "lts" ,
cpus: 2 ,
memory: "4G" ,
disk: "24G" ,
});
expect(runQaSuiteFromRuntime).not.toHaveBeenCalled();
});
it("passes live suite selection through to the multipass runner" , async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
runner: "multipass" ,
providerMode: "live-frontier" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "openai/gpt-5.4" ,
fastMode: true ,
allowFailures: true ,
scenarioIds: ["channel-chat-baseline" ],
});
expect(runQaMultipass).toHaveBeenCalledWith(
expect.objectContaining({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
transportId: "qa-channel" ,
providerMode: "live-frontier" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "openai/gpt-5.4" ,
fastMode: true ,
allowFailures: true ,
scenarioIds: ["channel-chat-baseline" ],
}),
);
});
it("sets a failing exit code when multipass summary reports failed scenarios" , async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-" ));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json" );
await fs.writeFile(
summaryPath,
JSON.stringify({
counts: {
total: 2 ,
passed: 1 ,
failed: 1 ,
},
}),
"utf8" ,
);
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md" ),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log" ),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log" ),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh" ),
vmName: "openclaw-qa-test" ,
scenarioIds: ["channel-chat-baseline" ],
});
const priorExitCode = process.exitCode;
process.exitCode = undefined;
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
runner: "multipass" ,
});
expect(process.exitCode).toBe(1 );
} finally {
process.exitCode = priorExitCode;
await fs.rm(repoRoot, { recursive: true , force: true });
}
});
it("rejects malformed multipass summary JSON" , async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-" ));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json" );
await fs.writeFile(summaryPath, "{not-json" , "utf8" );
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md" ),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log" ),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log" ),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh" ),
vmName: "openclaw-qa-test" ,
scenarioIds: ["channel-chat-baseline" ],
});
try {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
runner: "multipass" ,
}),
).rejects.toThrow("Could not parse QA summary JSON" );
} finally {
await fs.rm(repoRoot, { recursive: true , force: true });
}
});
it("rejects unreadable multipass summary JSON with read/parse wording" , async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-" ));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json" );
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md" ),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log" ),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log" ),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh" ),
vmName: "openclaw-qa-test" ,
scenarioIds: ["channel-chat-baseline" ],
});
try {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
runner: "multipass" ,
}),
).rejects.toThrow("Could not read QA summary JSON" );
} finally {
await fs.rm(repoRoot, { recursive: true , force: true });
}
});
it("rejects partial multipass summary JSON without failure fields" , async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-" ));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json" );
await fs.writeFile(summaryPath, JSON.stringify({ counts: { total: 2 , passed: 2 } }), "utf8" );
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md" ),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log" ),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log" ),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh" ),
vmName: "openclaw-qa-test" ,
scenarioIds: ["channel-chat-baseline" ],
});
try {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
runner: "multipass" ,
}),
).rejects.toThrow("did not include counts.failed or scenarios[].status" );
} finally {
await fs.rm(repoRoot, { recursive: true , force: true });
}
});
it("keeps multipass exit code clear when --allow-failures is set" , async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-" ));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json" );
await fs.writeFile(
summaryPath,
JSON.stringify({
counts: {
total: 2 ,
passed: 1 ,
failed: 1 ,
},
}),
"utf8" ,
);
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md" ),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log" ),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log" ),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh" ),
vmName: "openclaw-qa-test" ,
scenarioIds: ["channel-chat-baseline" ],
});
const priorExitCode = process.exitCode;
process.exitCode = undefined;
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
runner: "multipass" ,
allowFailures: true ,
});
expect(process.exitCode).toBeUndefined();
} finally {
process.exitCode = priorExitCode;
await fs.rm(repoRoot, { recursive: true , force: true });
}
});
it("passes provider-qualified mock parity suite selection through to the host runner" , async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
providerMode: "mock-openai" ,
parityPack: "agentic" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "anthropic/claude-opus-4-6" ,
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
outputDir: undefined,
transportId: "qa-channel" ,
providerMode: "mock-openai" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "anthropic/claude-opus-4-6" ,
fastMode: undefined,
scenarioIds: [
"approval-turn-tool-followthrough" ,
"model-switch-tool-continuity" ,
"source-docs-discovery-report" ,
"image-understanding-attachment" ,
"compaction-retry-mutating-tool" ,
"subagent-handoff" ,
"subagent-fanout-synthesis" ,
"subagent-stale-child-links" ,
"memory-recall" ,
"thread-memory-isolation" ,
"config-restart-capability-flip" ,
"instruction-followthrough-repo-contract" ,
],
});
});
it("rejects multipass-only suite flags on the host runner" , async () => {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo" ,
runner: "host" ,
image: "lts" ,
}),
).rejects.toThrow("--image, --cpus, --memory, and --disk require --runner multipass." );
});
it("defaults manual mock runs onto the mock-openai model lane" , async () => {
await runQaManualLaneCommand({
repoRoot: "/tmp/openclaw-repo" ,
providerMode: "mock-openai" ,
message: "read qa kickoff and reply short" ,
});
expect(runQaManualLane).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
transportId: "qa-channel" ,
providerMode: "mock-openai" ,
primaryModel: "mock-openai/gpt-5.4" ,
alternateModel: "mock-openai/gpt-5.4-alt" ,
fastMode: undefined,
message: "read qa kickoff and reply short" ,
timeoutMs: undefined,
});
});
it("defaults manual aimock runs onto the aimock model lane" , async () => {
await runQaManualLaneCommand({
repoRoot: "/tmp/openclaw-repo" ,
providerMode: "aimock" ,
message: "read qa kickoff and reply short" ,
});
expect(runQaManualLane).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
transportId: "qa-channel" ,
providerMode: "aimock" ,
primaryModel: "aimock/gpt-5.4" ,
alternateModel: "aimock/gpt-5.4-alt" ,
fastMode: undefined,
message: "read qa kickoff and reply short" ,
timeoutMs: undefined,
});
});
it("defaults manual frontier runs onto the frontier model lane" , async () => {
await runQaManualLaneCommand({
repoRoot: "/tmp/openclaw-repo" ,
message: "read qa kickoff and reply short" ,
});
expect(runQaManualLane).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
transportId: "qa-channel" ,
providerMode: "live-frontier" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "openai/gpt-5.4" ,
fastMode: undefined,
message: "read qa kickoff and reply short" ,
timeoutMs: undefined,
});
});
it("keeps an explicit manual primary model as the alternate default" , async () => {
await runQaManualLaneCommand({
repoRoot: "/tmp/openclaw-repo" ,
providerMode: "live-frontier" ,
primaryModel: "anthropic/claude-sonnet-4-6" ,
message: "read qa kickoff and reply short" ,
});
expect(runQaManualLane).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
transportId: "qa-channel" ,
providerMode: "live-frontier" ,
primaryModel: "anthropic/claude-sonnet-4-6" ,
alternateModel: "anthropic/claude-sonnet-4-6" ,
fastMode: undefined,
message: "read qa kickoff and reply short" ,
timeoutMs: undefined,
});
});
it("defaults manual frontier runs onto Codex OAuth when the runtime resolver prefers it" , async () => {
defaultQaRuntimeModelForMode.mockImplementation((mode, options) =>
mode === "live-frontier"
? "openai/gpt-5.4"
: defaultQaProviderModelForMode(mode as QaProviderModeInput, options),
);
await runQaManualLaneCommand({
repoRoot: "/tmp/openclaw-repo" ,
message: "read qa kickoff and reply short" ,
});
expect(runQaManualLane).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
transportId: "qa-channel" ,
providerMode: "live-frontier" ,
primaryModel: "openai/gpt-5.4" ,
alternateModel: "openai/gpt-5.4" ,
fastMode: undefined,
message: "read qa kickoff and reply short" ,
timeoutMs: undefined,
});
});
it("resolves self-check repo-root-relative paths before starting the lab server" , async () => {
await runQaLabSelfCheckCommand({
repoRoot: "/tmp/openclaw-repo" ,
output: ".artifacts/qa/self-check.md" ,
});
expect(startQaLabServer).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
outputPath: path.resolve("/tmp/openclaw-repo" , ".artifacts/qa/self-check.md" ),
});
});
it("resolves docker scaffold paths relative to the explicit repo root" , async () => {
await runQaDockerScaffoldCommand({
repoRoot: "/tmp/openclaw-repo" ,
outputDir: ".artifacts/qa-docker" ,
providerBaseUrl: "http://127.0.0.1:44080/v1 ",
usePrebuiltImage: true ,
});
expect(writeQaDockerHarnessFiles).toHaveBeenCalledWith({
outputDir: path.resolve("/tmp/openclaw-repo" , ".artifacts/qa-docker" ),
repoRoot: path.resolve("/tmp/openclaw-repo" ),
gatewayPort: undefined,
qaLabPort: undefined,
providerBaseUrl: "http://127.0.0.1:44080/v1 ",
imageName: undefined,
usePrebuiltImage: true ,
});
});
it("passes the explicit repo root into docker image builds" , async () => {
await runQaDockerBuildImageCommand({
repoRoot: "/tmp/openclaw-repo" ,
image: "openclaw:qa-local-prebaked" ,
});
expect(buildQaDockerHarnessImage).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
imageName: "openclaw:qa-local-prebaked" ,
});
});
it("resolves docker up paths relative to the explicit repo root" , async () => {
await runQaDockerUpCommand({
repoRoot: "/tmp/openclaw-repo" ,
outputDir: ".artifacts/qa-up" ,
usePrebuiltImage: true ,
skipUiBuild: true ,
});
expect(runQaDockerUp).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo" ),
outputDir: path.resolve("/tmp/openclaw-repo" , ".artifacts/qa-up" ),
gatewayPort: undefined,
qaLabPort: undefined,
providerBaseUrl: undefined,
image: undefined,
usePrebuiltImage: true ,
skipUiBuild: true ,
});
});
});
Messung V0.5 in Prozent C=99 H=99 G=98
¤ Dauer der Verarbeitung: 0.31 Sekunden
¤
*© Formatika GbR, Deutschland