import fs from "node:fs/promises" ;
import path from "node:path" ;
import { hasErrnoCode } from "../infra/errors.js" ;
import { isPathInside } from "./scan-paths.js" ;
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export type SkillScanSeverity = "info" | "warn" | "critical" ;
export type SkillScanFinding = {
ruleId: string;
severity: SkillScanSeverity;
file: string;
line: number;
message: string;
evidence: string;
};
export type SkillScanSummary = {
scannedFiles: number;
critical: number;
warn: number;
info: number;
findings: SkillScanFinding[];
};
export type SkillScanOptions = {
includeFiles?: string[];
maxFiles?: number;
maxFileBytes?: number;
};
// ---------------------------------------------------------------------------
// Scannable extensions
// ---------------------------------------------------------------------------
const SCANNABLE_EXTENSIONS = new Set([
".js" ,
".ts" ,
".mjs" ,
".cjs" ,
".mts" ,
".cts" ,
".jsx" ,
".tsx" ,
]);
const DEFAULT_MAX_SCAN_FILES = 500 ;
const DEFAULT_MAX_FILE_BYTES = 1024 * 1024 ;
const FILE_SCAN_CACHE_MAX = 5000 ;
const DIR_ENTRY_CACHE_MAX = 5000 ;
type FileScanCacheEntry = {
size: number;
mtimeMs: number;
maxFileBytes: number;
scanned: boolean ;
findings: SkillScanFinding[];
};
const FILE_SCAN_CACHE = new Map<string, FileScanCacheEntry>();
type CachedDirEntry = {
name: string;
kind: "file" | "dir" ;
};
type DirEntryCacheEntry = {
mtimeMs: number;
entries: CachedDirEntry[];
};
const DIR_ENTRY_CACHE = new Map<string, DirEntryCacheEntry>();
export function isScannable(filePath: string): boolean {
return SCANNABLE_EXTENSIONS.has(path.extname(filePath).toLowerCase());
}
function getCachedFileScanResult(params: {
filePath: string;
size: number;
mtimeMs: number;
maxFileBytes: number;
}): FileScanCacheEntry | undefined {
const cached = FILE_SCAN_CACHE.get(params.filePath);
if (!cached) {
return undefined;
}
if (
cached.size !== params.size ||
cached.mtimeMs !== params.mtimeMs ||
cached.maxFileBytes !== params.maxFileBytes
) {
FILE_SCAN_CACHE.delete (params.filePath);
return undefined;
}
return cached;
}
function setCachedFileScanResult(filePath: string, entry: FileScanCacheEntry): void {
if (FILE_SCAN_CACHE.size >= FILE_SCAN_CACHE_MAX) {
const oldest = FILE_SCAN_CACHE.keys().next();
if (!oldest.done) {
FILE_SCAN_CACHE.delete (oldest.value);
}
}
FILE_SCAN_CACHE.set(filePath, entry);
}
function setCachedDirEntries(dirPath: string, entry: DirEntryCacheEntry): void {
if (DIR_ENTRY_CACHE.size >= DIR_ENTRY_CACHE_MAX) {
const oldest = DIR_ENTRY_CACHE.keys().next();
if (!oldest.done) {
DIR_ENTRY_CACHE.delete (oldest.value);
}
}
DIR_ENTRY_CACHE.set(dirPath, entry);
}
export function clearSkillScanCacheForTest(): void {
FILE_SCAN_CACHE.clear();
DIR_ENTRY_CACHE.clear();
}
// ---------------------------------------------------------------------------
// Rule definitions
// ---------------------------------------------------------------------------
type LineRule = {
ruleId: string;
severity: SkillScanSeverity;
message: string;
pattern: RegExp;
/** If set, the rule only fires when the *full source* also matches this pattern. */
requiresContext?: RegExp;
};
type SourceRule = {
ruleId: string;
severity: SkillScanSeverity;
message: string;
/** Primary pattern tested against the full source. */
pattern: RegExp;
/** Secondary context pattern; both must match for the rule to fire. */
requiresContext?: RegExp;
};
const LINE_RULES: LineRule[] = [
{
ruleId: "dangerous-exec" ,
severity: "critical" ,
message: "Shell command execution detected (child_process)" ,
pattern: /\b(exec|execSync|spawn|spawnSync|execFile|execFileSync)\s*\(/,
requiresContext: /child_process/,
},
{
ruleId: "dynamic-code-execution" ,
severity: "critical" ,
message: "Dynamic code execution detected" ,
pattern: /\beval\s*\(|new \s+Function \s*\(/,
},
{
ruleId: "crypto-mining" ,
severity: "critical" ,
message: "Possible crypto-mining reference detected" ,
pattern: /stratum\+tcp|stratum\+ssl|coinhive|cryptonight|xmrig/i,
},
{
ruleId: "suspicious-network" ,
severity: "warn" ,
message: "WebSocket connection to non-standard port" ,
pattern: /new \s+WebSocket\s*\(\s*["']wss?:\/\/[^" ']*:(\d+)/,
},
];
const STANDARD_PORTS = new Set([80 , 443 , 8080 , 8443 , 3000 ]);
const NETWORK_SEND_CONTEXT_PATTERN = /\bfetch\s*\(|\bpost\s*\(|\.\s*post\s*\(|http\.request\s*\(/i;
const SOURCE_RULES: SourceRule[] = [
{
ruleId: "potential-exfiltration" ,
severity: "warn" ,
message: "File read combined with network send — possible data exfiltration" ,
pattern: /readFileSync|readFile/,
requiresContext: NETWORK_SEND_CONTEXT_PATTERN,
},
{
ruleId: "obfuscated-code" ,
severity: "warn" ,
message: "Hex-encoded string sequence detected (possible obfuscation)" ,
pattern: /(\\x[0 -9 a-fA-F]{2 }){6 ,}/,
},
{
ruleId: "obfuscated-code" ,
severity: "warn" ,
message: "Large base64 payload with decode call detected (possible obfuscation)" ,
pattern: /(?:atob|Buffer\.from)\s*\(\s*["'][A-Za-z0-9+/=]{200,}[" ']/,
},
{
ruleId: "env-harvesting" ,
severity: "critical" ,
message:
"Environment variable access combined with network send — possible credential harvesting" ,
pattern: /process\.env/,
requiresContext: NETWORK_SEND_CONTEXT_PATTERN,
},
];
// ---------------------------------------------------------------------------
// Core scanner
// ---------------------------------------------------------------------------
function truncateEvidence(evidence: string, maxLen = 120 ): string {
if (evidence.length <= maxLen) {
return evidence;
}
return `${evidence.slice(0 , maxLen)}…`;
}
export function scanSource(source: string, filePath: string): SkillScanFinding[] {
const findings: SkillScanFinding[] = [];
const lines = source.split("\n" );
const matchedLineRules = new Set<string>();
// --- Line rules ---
for (const rule of LINE_RULES) {
if (matchedLineRules.has(rule.ruleId)) {
continue ;
}
// Skip rule entirely if context requirement not met
if (rule.requiresContext && !rule.requiresContext.test(source)) {
continue ;
}
for (let i = 0 ; i < lines.length; i++) {
const line = lines[i];
const match = rule.pattern.exec(line);
if (!match) {
continue ;
}
// Special handling for suspicious-network: check port
if (rule.ruleId === "suspicious-network" ) {
const port = Number.parseInt(match[1 ], 10 );
if (STANDARD_PORTS.has(port)) {
continue ;
}
}
findings.push({
ruleId: rule.ruleId,
severity: rule.severity,
file: filePath,
line: i + 1 ,
message: rule.message,
evidence: truncateEvidence(line.trim()),
});
matchedLineRules.add(rule.ruleId);
break ; // one finding per line-rule per file
}
}
// --- Source rules ---
const matchedSourceRules = new Set<string>();
for (const rule of SOURCE_RULES) {
// Allow multiple findings for different messages with the same ruleId
// but deduplicate exact (ruleId+message) combos
const ruleKey = `${rule.ruleId}::${rule.message}`;
if (matchedSourceRules.has(ruleKey)) {
continue ;
}
if (!rule.pattern.test(source)) {
continue ;
}
if (rule.requiresContext && !rule.requiresContext.test(source)) {
continue ;
}
// Find the first matching line for evidence + line number
let matchLine = 0 ;
let matchEvidence = "" ;
for (let i = 0 ; i < lines.length; i++) {
if (rule.pattern.test(lines[i])) {
matchLine = i + 1 ;
matchEvidence = lines[i].trim();
break ;
}
}
// For source rules, if we can't find a line match the pattern might span
// lines. Report line 0 with truncated source as evidence.
if (matchLine === 0 ) {
matchLine = 1 ;
matchEvidence = source.slice(0 , 120 );
}
findings.push({
ruleId: rule.ruleId,
severity: rule.severity,
file: filePath,
line: matchLine,
message: rule.message,
evidence: truncateEvidence(matchEvidence),
});
matchedSourceRules.add(ruleKey);
}
return findings;
}
// ---------------------------------------------------------------------------
// Directory scanner
// ---------------------------------------------------------------------------
function normalizeScanOptions(opts?: SkillScanOptions): Required<SkillScanOptions> {
return {
includeFiles: opts?.includeFiles ?? [],
maxFiles: Math.max(1 , opts?.maxFiles ?? DEFAULT_MAX_SCAN_FILES),
maxFileBytes: Math.max(1 , opts?.maxFileBytes ?? DEFAULT_MAX_FILE_BYTES),
};
}
async function walkDirWithLimit(dirPath: string, maxFiles: number): Promise<string[]> {
const files: string[] = [];
const stack: string[] = [dirPath];
while (stack.length > 0 && files.length < maxFiles) {
const currentDir = stack.pop();
if (!currentDir) {
break ;
}
const entries = await readDirEntriesWithCache(currentDir);
for (const entry of entries) {
if (files.length >= maxFiles) {
break ;
}
// Skip hidden dirs and node_modules
if (entry.name.startsWith("." ) || entry.name === "node_modules" ) {
continue ;
}
const fullPath = path.join(currentDir, entry.name);
if (entry.kind === "dir" ) {
stack.push(fullPath);
} else if (entry.kind === "file" && isScannable(entry.name)) {
files.push(fullPath);
}
}
}
return files;
}
async function readDirEntriesWithCache(dirPath: string): Promise<CachedDirEntry[]> {
let st: Awaited<ReturnType<typeof fs.stat>> | null = null ;
try {
st = await fs.stat(dirPath);
} catch (err) {
if (hasErrnoCode(err, "ENOENT" )) {
return [];
}
throw err;
}
if (!st?.isDirectory()) {
return [];
}
const cached = DIR_ENTRY_CACHE.get(dirPath);
if (cached && cached.mtimeMs === st.mtimeMs) {
return cached.entries;
}
const dirents = await fs.readdir(dirPath, { withFileTypes: true });
const entries: CachedDirEntry[] = [];
for (const entry of dirents) {
if (entry.isDirectory()) {
entries.push({ name: entry.name, kind: "dir" });
} else if (entry.isFile()) {
entries.push({ name: entry.name, kind: "file" });
}
}
setCachedDirEntries(dirPath, {
mtimeMs: st.mtimeMs,
entries,
});
return entries;
}
async function resolveForcedFiles(params: {
rootDir: string;
includeFiles: string[];
}): Promise<string[]> {
if (params.includeFiles.length === 0 ) {
return [];
}
const seen = new Set<string>();
const out: string[] = [];
for (const rawIncludePath of params.includeFiles) {
const includePath = path.resolve(params.rootDir, rawIncludePath);
if (!isPathInside(params.rootDir, includePath)) {
continue ;
}
if (!isScannable(includePath)) {
continue ;
}
if (seen.has(includePath)) {
continue ;
}
let st: Awaited<ReturnType<typeof fs.stat>> | null = null ;
try {
st = await fs.stat(includePath);
} catch (err) {
if (hasErrnoCode(err, "ENOENT" )) {
continue ;
}
throw err;
}
if (!st?.isFile()) {
continue ;
}
out.push(includePath);
seen.add(includePath);
}
return out;
}
async function collectScannableFiles(dirPath: string, opts: Required<SkillScanOptions>) {
const forcedFiles = await resolveForcedFiles({
rootDir: dirPath,
includeFiles: opts.includeFiles,
});
if (forcedFiles.length >= opts.maxFiles) {
return forcedFiles.slice(0 , opts.maxFiles);
}
const walkedFiles = await walkDirWithLimit(dirPath, opts.maxFiles);
const seen = new Set(forcedFiles.map((f) => path.resolve(f)));
const out = [...forcedFiles];
for (const walkedFile of walkedFiles) {
if (out.length >= opts.maxFiles) {
break ;
}
const resolved = path.resolve(walkedFile);
if (seen.has(resolved)) {
continue ;
}
out.push(walkedFile);
seen.add(resolved);
}
return out;
}
async function scanFileWithCache(params: {
filePath: string;
maxFileBytes: number;
}): Promise<{ scanned: boolean ; findings: SkillScanFinding[] }> {
const { filePath, maxFileBytes } = params;
let st: Awaited<ReturnType<typeof fs.stat>> | null = null ;
try {
st = await fs.stat(filePath);
} catch (err) {
if (hasErrnoCode(err, "ENOENT" )) {
return { scanned: false , findings: [] };
}
throw err;
}
if (!st?.isFile()) {
return { scanned: false , findings: [] };
}
const cached = getCachedFileScanResult({
filePath,
size: st.size,
mtimeMs: st.mtimeMs,
maxFileBytes,
});
if (cached) {
return {
scanned: cached.scanned,
findings: cached.findings,
};
}
if (st.size > maxFileBytes) {
const skippedEntry: FileScanCacheEntry = {
size: st.size,
mtimeMs: st.mtimeMs,
maxFileBytes,
scanned: false ,
findings: [],
};
setCachedFileScanResult(filePath, skippedEntry);
return { scanned: false , findings: [] };
}
let source: string;
try {
source = await fs.readFile(filePath, "utf-8" );
} catch (err) {
if (hasErrnoCode(err, "ENOENT" )) {
return { scanned: false , findings: [] };
}
throw err;
}
const findings = scanSource(source, filePath);
setCachedFileScanResult(filePath, {
size: st.size,
mtimeMs: st.mtimeMs,
maxFileBytes,
scanned: true ,
findings,
});
return { scanned: true , findings };
}
export async function scanDirectory(
dirPath: string,
opts?: SkillScanOptions,
): Promise<SkillScanFinding[]> {
const scanOptions = normalizeScanOptions(opts);
const files = await collectScannableFiles(dirPath, scanOptions);
const allFindings: SkillScanFinding[] = [];
for (const file of files) {
const scanResult = await scanFileWithCache({
filePath: file,
maxFileBytes: scanOptions.maxFileBytes,
});
if (!scanResult.scanned) {
continue ;
}
allFindings.push(...scanResult.findings);
}
return allFindings;
}
export async function scanDirectoryWithSummary(
dirPath: string,
opts?: SkillScanOptions,
): Promise<SkillScanSummary> {
const scanOptions = normalizeScanOptions(opts);
const files = await collectScannableFiles(dirPath, scanOptions);
const allFindings: SkillScanFinding[] = [];
let scannedFiles = 0 ;
let critical = 0 ;
let warn = 0 ;
let info = 0 ;
for (const file of files) {
const scanResult = await scanFileWithCache({
filePath: file,
maxFileBytes: scanOptions.maxFileBytes,
});
if (!scanResult.scanned) {
continue ;
}
scannedFiles += 1 ;
for (const finding of scanResult.findings) {
allFindings.push(finding);
if (finding.severity === "critical" ) {
critical += 1 ;
} else if (finding.severity === "warn" ) {
warn += 1 ;
} else {
info += 1 ;
}
}
}
return {
scannedFiles,
critical,
warn,
info,
findings: allFindings,
};
}
Messung V0.5 in Prozent C=96 H=86 G=90
¤ Dauer der Verarbeitung: 0.9 Sekunden
(vorverarbeitet am 2026-06-08)
¤
*© Formatika GbR, Deutschland