From 685c7549e13801e1456e9615ab25b7869627edf7 Mon Sep 17 00:00:00 2001 From: dotta Date: Wed, 18 Mar 2026 21:47:53 -0500 Subject: [PATCH] Filter junk files from instructions bundles Co-Authored-By: Paperclip --- .../agent-instructions-service.test.ts | 41 +++++++++++++++++++ server/src/services/agent-instructions.ts | 28 ++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/server/src/__tests__/agent-instructions-service.test.ts b/server/src/__tests__/agent-instructions-service.test.ts index 8129b5ec..0e0d9d39 100644 --- a/server/src/__tests__/agent-instructions-service.test.ts +++ b/server/src/__tests__/agent-instructions-service.test.ts @@ -120,4 +120,45 @@ describe("agent instructions service", () => { expect(result.bundle.rootPath).toBe(externalRoot); await expect(fs.readFile(path.join(externalRoot, "docs", "AGENTS.md"), "utf8")).resolves.toBe("# Managed Agent\n"); }); + + it("filters junk files, dependency bundles, and python caches from bundle listings and exports", async () => { + const externalRoot = await makeTempDir("paperclip-agent-instructions-ignore-"); + cleanupDirs.add(externalRoot); + + await fs.writeFile(path.join(externalRoot, "AGENTS.md"), "# External Agent\n", "utf8"); + await fs.writeFile(path.join(externalRoot, ".gitignore"), "node_modules/\n", "utf8"); + await fs.writeFile(path.join(externalRoot, ".DS_Store"), "junk", "utf8"); + await fs.mkdir(path.join(externalRoot, "docs"), { recursive: true }); + await fs.writeFile(path.join(externalRoot, "docs", "TOOLS.md"), "## Tools\n", "utf8"); + await fs.writeFile(path.join(externalRoot, "docs", "module.pyc"), "compiled", "utf8"); + await fs.writeFile(path.join(externalRoot, "docs", "._TOOLS.md"), "appledouble", "utf8"); + await fs.mkdir(path.join(externalRoot, "node_modules", "pkg"), { recursive: true }); + await fs.writeFile(path.join(externalRoot, "node_modules", "pkg", "index.js"), "export {};\n", "utf8"); + await fs.mkdir(path.join(externalRoot, "python", "__pycache__"), { recursive: true }); + await fs.writeFile( + path.join(externalRoot, "python", "__pycache__", "module.cpython-313.pyc"), + "compiled", + "utf8", + ); + await fs.mkdir(path.join(externalRoot, ".pytest_cache"), { recursive: true }); + await fs.writeFile(path.join(externalRoot, ".pytest_cache", "README.md"), "cache", "utf8"); + + const svc = agentInstructionsService(); + const agent = makeAgent({ + instructionsBundleMode: "external", + instructionsRootPath: externalRoot, + instructionsEntryFile: "AGENTS.md", + instructionsFilePath: path.join(externalRoot, "AGENTS.md"), + }); + + const bundle = await svc.getBundle(agent); + const exported = await svc.exportFiles(agent); + + expect(bundle.files.map((file) => file.path)).toEqual([".gitignore", "AGENTS.md", "docs/TOOLS.md"]); + expect(Object.keys(exported.files).sort((left, right) => left.localeCompare(right))).toEqual([ + ".gitignore", + "AGENTS.md", + "docs/TOOLS.md", + ]); + }); }); diff --git a/server/src/services/agent-instructions.ts b/server/src/services/agent-instructions.ts index 086d5fbb..d3fc7008 100644 --- a/server/src/services/agent-instructions.ts +++ b/server/src/services/agent-instructions.ts @@ -11,6 +11,18 @@ const FILE_KEY = "instructionsFilePath"; const PROMPT_KEY = "promptTemplate"; const BOOTSTRAP_PROMPT_KEY = "bootstrapPromptTemplate"; const LEGACY_PROMPT_TEMPLATE_PATH = "promptTemplate.legacy.md"; +const IGNORED_INSTRUCTIONS_FILE_NAMES = new Set([".DS_Store", "Thumbs.db", "Desktop.ini"]); +const IGNORED_INSTRUCTIONS_DIRECTORY_NAMES = new Set([ + ".git", + ".nox", + ".pytest_cache", + ".ruff_cache", + ".tox", + ".venv", + "__pycache__", + "node_modules", + "venv", +]); type BundleMode = "managed" | "external"; @@ -143,13 +155,27 @@ async function statIfExists(targetPath: string) { return fs.stat(targetPath).catch(() => null); } +function shouldIgnoreInstructionsEntry(entry: { name: string; isDirectory(): boolean; isFile(): boolean }) { + if (entry.name === "." || entry.name === "..") return true; + if (entry.isDirectory()) { + return IGNORED_INSTRUCTIONS_DIRECTORY_NAMES.has(entry.name); + } + if (!entry.isFile()) return false; + return ( + IGNORED_INSTRUCTIONS_FILE_NAMES.has(entry.name) + || entry.name.startsWith("._") + || entry.name.endsWith(".pyc") + || entry.name.endsWith(".pyo") + ); +} + async function listFilesRecursive(rootPath: string): Promise { const output: string[] = []; async function walk(currentPath: string, relativeDir: string) { const entries = await fs.readdir(currentPath, { withFileTypes: true }).catch(() => []); for (const entry of entries) { - if (entry.name === "." || entry.name === "..") continue; + if (shouldIgnoreInstructionsEntry(entry)) continue; const absolutePath = path.join(currentPath, entry.name); const relativePath = normalizeRelativeFilePath( relativeDir ? path.posix.join(relativeDir, entry.name) : entry.name,