Filter junk files from instructions bundles

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
dotta
2026-03-18 21:47:53 -05:00
parent 8be868f0ab
commit 685c7549e1
2 changed files with 68 additions and 1 deletions

View File

@@ -120,4 +120,45 @@ describe("agent instructions service", () => {
expect(result.bundle.rootPath).toBe(externalRoot);
await expect(fs.readFile(path.join(externalRoot, "docs", "AGENTS.md"), "utf8")).resolves.toBe("# Managed Agent\n");
});
it("filters junk files, dependency bundles, and python caches from bundle listings and exports", async () => {
const externalRoot = await makeTempDir("paperclip-agent-instructions-ignore-");
cleanupDirs.add(externalRoot);
await fs.writeFile(path.join(externalRoot, "AGENTS.md"), "# External Agent\n", "utf8");
await fs.writeFile(path.join(externalRoot, ".gitignore"), "node_modules/\n", "utf8");
await fs.writeFile(path.join(externalRoot, ".DS_Store"), "junk", "utf8");
await fs.mkdir(path.join(externalRoot, "docs"), { recursive: true });
await fs.writeFile(path.join(externalRoot, "docs", "TOOLS.md"), "## Tools\n", "utf8");
await fs.writeFile(path.join(externalRoot, "docs", "module.pyc"), "compiled", "utf8");
await fs.writeFile(path.join(externalRoot, "docs", "._TOOLS.md"), "appledouble", "utf8");
await fs.mkdir(path.join(externalRoot, "node_modules", "pkg"), { recursive: true });
await fs.writeFile(path.join(externalRoot, "node_modules", "pkg", "index.js"), "export {};\n", "utf8");
await fs.mkdir(path.join(externalRoot, "python", "__pycache__"), { recursive: true });
await fs.writeFile(
path.join(externalRoot, "python", "__pycache__", "module.cpython-313.pyc"),
"compiled",
"utf8",
);
await fs.mkdir(path.join(externalRoot, ".pytest_cache"), { recursive: true });
await fs.writeFile(path.join(externalRoot, ".pytest_cache", "README.md"), "cache", "utf8");
const svc = agentInstructionsService();
const agent = makeAgent({
instructionsBundleMode: "external",
instructionsRootPath: externalRoot,
instructionsEntryFile: "AGENTS.md",
instructionsFilePath: path.join(externalRoot, "AGENTS.md"),
});
const bundle = await svc.getBundle(agent);
const exported = await svc.exportFiles(agent);
expect(bundle.files.map((file) => file.path)).toEqual([".gitignore", "AGENTS.md", "docs/TOOLS.md"]);
expect(Object.keys(exported.files).sort((left, right) => left.localeCompare(right))).toEqual([
".gitignore",
"AGENTS.md",
"docs/TOOLS.md",
]);
});
});

View File

@@ -11,6 +11,18 @@ const FILE_KEY = "instructionsFilePath";
const PROMPT_KEY = "promptTemplate";
const BOOTSTRAP_PROMPT_KEY = "bootstrapPromptTemplate";
const LEGACY_PROMPT_TEMPLATE_PATH = "promptTemplate.legacy.md";
const IGNORED_INSTRUCTIONS_FILE_NAMES = new Set([".DS_Store", "Thumbs.db", "Desktop.ini"]);
const IGNORED_INSTRUCTIONS_DIRECTORY_NAMES = new Set([
".git",
".nox",
".pytest_cache",
".ruff_cache",
".tox",
".venv",
"__pycache__",
"node_modules",
"venv",
]);
type BundleMode = "managed" | "external";
@@ -143,13 +155,27 @@ async function statIfExists(targetPath: string) {
return fs.stat(targetPath).catch(() => null);
}
function shouldIgnoreInstructionsEntry(entry: { name: string; isDirectory(): boolean; isFile(): boolean }) {
if (entry.name === "." || entry.name === "..") return true;
if (entry.isDirectory()) {
return IGNORED_INSTRUCTIONS_DIRECTORY_NAMES.has(entry.name);
}
if (!entry.isFile()) return false;
return (
IGNORED_INSTRUCTIONS_FILE_NAMES.has(entry.name)
|| entry.name.startsWith("._")
|| entry.name.endsWith(".pyc")
|| entry.name.endsWith(".pyo")
);
}
async function listFilesRecursive(rootPath: string): Promise<string[]> {
const output: string[] = [];
async function walk(currentPath: string, relativeDir: string) {
const entries = await fs.readdir(currentPath, { withFileTypes: true }).catch(() => []);
for (const entry of entries) {
if (entry.name === "." || entry.name === "..") continue;
if (shouldIgnoreInstructionsEntry(entry)) continue;
const absolutePath = path.join(currentPath, entry.name);
const relativePath = normalizeRelativeFilePath(
relativeDir ? path.posix.join(relativeDir, entry.name) : entry.name,