Add minimal worktree seed mode

This commit is contained in:
Dotta
2026-03-10 07:41:01 -05:00
parent 0704854926
commit 4a67db6a4d
5 changed files with 269 additions and 41 deletions

View File

@@ -4,6 +4,7 @@ import {
buildWorktreeConfig,
buildWorktreeEnvEntries,
formatShellExports,
resolveWorktreeSeedPlan,
resolveWorktreeLocalPaths,
rewriteLocalUrlPort,
sanitizeWorktreeInstanceId,
@@ -107,4 +108,18 @@ describe("worktree helpers", () => {
expect(env.PAPERCLIP_INSTANCE_ID).toBe("feature-worktree-support");
expect(formatShellExports(env)).toContain("export PAPERCLIP_INSTANCE_ID='feature-worktree-support'");
});
it("uses minimal seed mode to keep app state but drop heavy runtime history", () => {
const minimal = resolveWorktreeSeedPlan("minimal");
const full = resolveWorktreeSeedPlan("full");
expect(minimal.excludedTables).toContain("heartbeat_runs");
expect(minimal.excludedTables).toContain("heartbeat_run_events");
expect(minimal.excludedTables).toContain("workspace_runtime_services");
expect(minimal.excludedTables).toContain("agent_task_sessions");
expect(minimal.nullifyColumns.issues).toEqual(["checkout_run_id", "execution_run_id"]);
expect(full.excludedTables).toEqual([]);
expect(full.nullifyColumns).toEqual({});
});
});

View File

@@ -3,6 +3,30 @@ import type { PaperclipConfig } from "../config/schema.js";
import { expandHomePrefix } from "../config/home.js";
export const DEFAULT_WORKTREE_HOME = "~/.paperclip-worktrees";
export const WORKTREE_SEED_MODES = ["minimal", "full"] as const;
export type WorktreeSeedMode = (typeof WORKTREE_SEED_MODES)[number];
export type WorktreeSeedPlan = {
mode: WorktreeSeedMode;
excludedTables: string[];
nullifyColumns: Record<string, string[]>;
};
const MINIMAL_WORKTREE_EXCLUDED_TABLES = [
"activity_log",
"agent_runtime_state",
"agent_task_sessions",
"agent_wakeup_requests",
"cost_events",
"heartbeat_run_events",
"heartbeat_runs",
"workspace_runtime_services",
];
const MINIMAL_WORKTREE_NULLIFIED_COLUMNS: Record<string, string[]> = {
issues: ["checkout_run_id", "execution_run_id"],
};
export type WorktreeLocalPaths = {
cwd: string;
@@ -20,6 +44,27 @@ export type WorktreeLocalPaths = {
storageDir: string;
};
export function isWorktreeSeedMode(value: string): value is WorktreeSeedMode {
return (WORKTREE_SEED_MODES as readonly string[]).includes(value);
}
export function resolveWorktreeSeedPlan(mode: WorktreeSeedMode): WorktreeSeedPlan {
if (mode === "full") {
return {
mode,
excludedTables: [],
nullifyColumns: {},
};
}
return {
mode,
excludedTables: [...MINIMAL_WORKTREE_EXCLUDED_TABLES],
nullifyColumns: {
...MINIMAL_WORKTREE_NULLIFIED_COLUMNS,
},
};
}
function nonEmpty(value: string | null | undefined): string | null {
return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
}

View File

@@ -22,9 +22,12 @@ import {
buildWorktreeEnvEntries,
DEFAULT_WORKTREE_HOME,
formatShellExports,
isWorktreeSeedMode,
resolveSuggestedWorktreeName,
resolveWorktreeSeedPlan,
resolveWorktreeLocalPaths,
sanitizeWorktreeInstanceId,
type WorktreeSeedMode,
type WorktreeLocalPaths,
} from "./worktree-lib.js";
@@ -38,6 +41,7 @@ type WorktreeInitOptions = {
serverPort?: number;
dbPort?: number;
seed?: boolean;
seedMode?: string;
force?: boolean;
};
@@ -178,6 +182,8 @@ async function ensureEmbeddedPostgres(dataDir: string, preferredPort: number): P
password: "paperclip",
port,
persistent: true,
onLog: () => {},
onError: () => {},
});
if (!existsSync(path.resolve(dataDir, "PG_VERSION"))) {
@@ -203,7 +209,9 @@ async function seedWorktreeDatabase(input: {
targetConfig: PaperclipConfig;
targetPaths: WorktreeLocalPaths;
instanceId: string;
seedMode: WorktreeSeedMode;
}): Promise<string> {
const seedPlan = resolveWorktreeSeedPlan(input.seedMode);
const sourceEnvFile = resolvePaperclipEnvFile(input.sourceConfigPath);
const sourceEnvEntries = readPaperclipEnvEntries(sourceEnvFile);
let sourceHandle: EmbeddedPostgresHandle | null = null;
@@ -227,6 +235,8 @@ async function seedWorktreeDatabase(input: {
retentionDays: 7,
filenamePrefix: `${input.instanceId}-seed`,
includeMigrationJournal: true,
excludeTables: seedPlan.excludedTables,
nullifyColumns: seedPlan.nullifyColumns,
});
targetHandle = await ensureEmbeddedPostgres(
@@ -262,6 +272,10 @@ export async function worktreeInitCommand(opts: WorktreeInitOptions): Promise<vo
cwd,
opts.name ?? detectGitBranchName(cwd) ?? undefined,
);
const seedMode = opts.seedMode ?? "minimal";
if (!isWorktreeSeedMode(seedMode)) {
throw new Error(`Unsupported seed mode "${seedMode}". Expected one of: minimal, full.`);
}
const instanceId = sanitizeWorktreeInstanceId(opts.instance ?? name);
const paths = resolveWorktreeLocalPaths({
cwd,
@@ -306,7 +320,7 @@ export async function worktreeInitCommand(opts: WorktreeInitOptions): Promise<vo
);
}
const spinner = p.spinner();
spinner.start("Seeding isolated worktree database from source instance...");
spinner.start(`Seeding isolated worktree database from source instance (${seedMode})...`);
try {
seedSummary = await seedWorktreeDatabase({
sourceConfigPath,
@@ -314,8 +328,9 @@ export async function worktreeInitCommand(opts: WorktreeInitOptions): Promise<vo
targetConfig,
targetPaths: paths,
instanceId,
seedMode,
});
spinner.stop("Seeded isolated worktree database.");
spinner.stop(`Seeded isolated worktree database (${seedMode}).`);
} catch (error) {
spinner.stop(pc.red("Failed to seed worktree database."));
throw error;
@@ -328,6 +343,7 @@ export async function worktreeInitCommand(opts: WorktreeInitOptions): Promise<vo
p.log.message(pc.dim(`Instance: ${paths.instanceId}`));
p.log.message(pc.dim(`Server port: ${serverPort} | DB port: ${databasePort}`));
if (seedSummary) {
p.log.message(pc.dim(`Seed mode: ${seedMode}`));
p.log.message(pc.dim(`Seed snapshot: ${seedSummary}`));
}
p.outro(
@@ -371,6 +387,7 @@ export function registerWorktreeCommands(program: Command): void {
.option("--from-instance <id>", "Source instance id when deriving the source config", "default")
.option("--server-port <port>", "Preferred server port", (value) => Number(value))
.option("--db-port <port>", "Preferred embedded Postgres port", (value) => Number(value))
.option("--seed-mode <mode>", "Seed profile: minimal or full (default: minimal)", "minimal")
.option("--no-seed", "Skip database seeding from the source instance")
.option("--force", "Replace existing repo-local config and isolated instance data", false)
.action(worktreeInitCommand);

View File

@@ -139,7 +139,13 @@ This command:
- writes repo-local files at `.paperclip/config.json` and `.paperclip/.env`
- creates an isolated instance under `~/.paperclip-worktrees/instances/<worktree-id>/`
- picks a free app port and embedded PostgreSQL port
- by default seeds the isolated DB from your main instance via a logical SQL snapshot
- by default seeds the isolated DB in `minimal` mode from your main instance via a logical SQL snapshot
Seed modes:
- `minimal` keeps core app state like companies, projects, issues, comments, approvals, and auth state, but drops heavy operational history such as heartbeat runs, wake requests, activity logs, runtime services, and agent session state
- `full` makes a full logical clone of the source instance
- `--no-seed` creates an empty isolated instance
After `worktree init`, both the server and the CLI auto-load the repo-local `.paperclip/.env` when run inside that worktree, so normal commands like `pnpm dev`, `paperclipai doctor`, and `paperclipai db:backup` stay scoped to the worktree instance.
@@ -155,6 +161,8 @@ Useful options:
```sh
paperclipai worktree init --no-seed
paperclipai worktree init --seed-mode minimal
paperclipai worktree init --seed-mode full
paperclipai worktree init --from-instance default
paperclipai worktree init --from-data-dir ~/.paperclip
paperclipai worktree init --force

View File

@@ -1,6 +1,6 @@
import { existsSync, mkdirSync, readdirSync, statSync, unlinkSync } from "node:fs";
import { writeFile } from "node:fs/promises";
import { resolve } from "node:path";
import { readFile, writeFile } from "node:fs/promises";
import { basename, resolve } from "node:path";
import postgres from "postgres";
export type RunDatabaseBackupOptions = {
@@ -10,6 +10,8 @@ export type RunDatabaseBackupOptions = {
filenamePrefix?: string;
connectTimeoutSeconds?: number;
includeMigrationJournal?: boolean;
excludeTables?: string[];
nullifyColumns?: Record<string, string[]>;
};
export type RunDatabaseBackupResult = {
@@ -24,6 +26,34 @@ export type RunDatabaseRestoreOptions = {
connectTimeoutSeconds?: number;
};
type SequenceDefinition = {
sequence_name: string;
data_type: string;
start_value: string;
minimum_value: string;
maximum_value: string;
increment: string;
cycle_option: "YES" | "NO";
owner_table: string | null;
owner_column: string | null;
};
const STATEMENT_BREAKPOINT = "-- paperclip statement breakpoint 69f6f3f1-42fd-46a6-bf17-d1d85f8f3900";
function sanitizeRestoreErrorMessage(error: unknown): string {
if (error && typeof error === "object") {
const record = error as Record<string, unknown>;
const firstLine = typeof record.message === "string"
? record.message.split(/\r?\n/, 1)[0]?.trim()
: "";
const detail = typeof record.detail === "string" ? record.detail.trim() : "";
const severity = typeof record.severity === "string" ? record.severity.trim() : "";
const message = firstLine || detail || (error instanceof Error ? error.message : String(error));
return severity ? `${severity}: ${message}` : message;
}
return error instanceof Error ? error.message : String(error);
}
function timestamp(date: Date = new Date()): string {
const pad = (n: number) => String(n).padStart(2, "0");
return `${date.getFullYear()}${pad(date.getMonth() + 1)}${pad(date.getDate())}-${pad(date.getHours())}${pad(date.getMinutes())}${pad(date.getSeconds())}`;
@@ -54,11 +84,48 @@ function formatBackupSize(sizeBytes: number): string {
return `${(sizeBytes / (1024 * 1024)).toFixed(1)}M`;
}
function formatSqlLiteral(value: string): string {
const sanitized = value.replace(/\u0000/g, "");
let tag = "$paperclip$";
while (sanitized.includes(tag)) {
tag = `$paperclip_${Math.random().toString(36).slice(2, 8)}$`;
}
return `${tag}${sanitized}${tag}`;
}
function normalizeTableNameSet(values: string[] | undefined): Set<string> {
return new Set(
(values ?? [])
.map((value) => value.trim())
.filter((value) => value.length > 0),
);
}
function normalizeNullifyColumnMap(values: Record<string, string[]> | undefined): Map<string, Set<string>> {
const out = new Map<string, Set<string>>();
if (!values) return out;
for (const [tableName, columns] of Object.entries(values)) {
const normalizedTable = tableName.trim();
if (normalizedTable.length === 0) continue;
const normalizedColumns = new Set(
columns
.map((column) => column.trim())
.filter((column) => column.length > 0),
);
if (normalizedColumns.size > 0) {
out.set(normalizedTable, normalizedColumns);
}
}
return out;
}
export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise<RunDatabaseBackupResult> {
const filenamePrefix = opts.filenamePrefix ?? "paperclip";
const retentionDays = Math.max(1, Math.trunc(opts.retentionDays));
const connectTimeout = Math.max(1, Math.trunc(opts.connectTimeoutSeconds ?? 5));
const includeMigrationJournal = opts.includeMigrationJournal === true;
const excludedTableNames = normalizeTableNameSet(opts.excludeTables);
const nullifiedColumnsByTable = normalizeNullifyColumnMap(opts.nullifyColumns);
const sql = postgres(opts.connectionString, { max: 1, connect_timeout: connectTimeout });
try {
@@ -66,13 +133,36 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
const lines: string[] = [];
const emit = (line: string) => lines.push(line);
const emitStatement = (statement: string) => {
emit(statement);
emit(STATEMENT_BREAKPOINT);
};
const emitStatementBoundary = () => {
emit(STATEMENT_BREAKPOINT);
};
emit("-- Paperclip database backup");
emit(`-- Created: ${new Date().toISOString()}`);
emit("");
emit("BEGIN;");
emitStatement("BEGIN;");
emitStatement("SET LOCAL session_replication_role = replica;");
emitStatement("SET LOCAL client_min_messages = warning;");
emit("");
const allTables = await sql<{ tablename: string }[]>`
SELECT c.relname AS tablename
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE n.nspname = 'public'
AND c.relkind = 'r'
ORDER BY c.relname
`;
const tables = allTables.filter(({ tablename }) => {
if (!includeMigrationJournal && tablename === "__drizzle_migrations") return false;
return !excludedTableNames.has(tablename);
});
const includedTableNames = new Set(tables.map(({ tablename }) => tablename));
// Get all enums
const enums = await sql<{ typname: string; labels: string[] }[]>`
SELECT t.typname, array_agg(e.enumlabel ORDER BY e.enumsortorder) AS labels
@@ -86,20 +176,42 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
for (const e of enums) {
const labels = e.labels.map((l) => `'${l.replace(/'/g, "''")}'`).join(", ");
emit(`CREATE TYPE "public"."${e.typname}" AS ENUM (${labels});`);
emitStatement(`CREATE TYPE "public"."${e.typname}" AS ENUM (${labels});`);
}
if (enums.length > 0) emit("");
// Get tables in dependency order (referenced tables first)
const tables = await sql<{ tablename: string }[]>`
SELECT c.relname AS tablename
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE n.nspname = 'public'
AND c.relkind = 'r'
AND (${includeMigrationJournal}::boolean OR c.relname != '__drizzle_migrations')
ORDER BY c.relname
const allSequences = await sql<SequenceDefinition[]>`
SELECT
s.sequence_name,
s.data_type,
s.start_value,
s.minimum_value,
s.maximum_value,
s.increment,
s.cycle_option,
tbl.relname AS owner_table,
attr.attname AS owner_column
FROM information_schema.sequences s
JOIN pg_class seq ON seq.relname = s.sequence_name
JOIN pg_namespace n ON n.oid = seq.relnamespace AND n.nspname = s.sequence_schema
LEFT JOIN pg_depend dep ON dep.objid = seq.oid AND dep.deptype = 'a'
LEFT JOIN pg_class tbl ON tbl.oid = dep.refobjid
LEFT JOIN pg_attribute attr ON attr.attrelid = tbl.oid AND attr.attnum = dep.refobjsubid
WHERE s.sequence_schema = 'public'
ORDER BY s.sequence_name
`;
const sequences = allSequences.filter((seq) => !seq.owner_table || includedTableNames.has(seq.owner_table));
if (sequences.length > 0) {
emit("-- Sequences");
for (const seq of sequences) {
emitStatement(`DROP SEQUENCE IF EXISTS "${seq.sequence_name}" CASCADE;`);
emitStatement(
`CREATE SEQUENCE "${seq.sequence_name}" AS ${seq.data_type} INCREMENT BY ${seq.increment} MINVALUE ${seq.minimum_value} MAXVALUE ${seq.maximum_value} START WITH ${seq.start_value}${seq.cycle_option === "YES" ? " CYCLE" : " NO CYCLE"};`,
);
}
emit("");
}
// Get full CREATE TABLE DDL via column info
for (const { tablename } of tables) {
@@ -121,7 +233,7 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
`;
emit(`-- Table: ${tablename}`);
emit(`DROP TABLE IF EXISTS "${tablename}" CASCADE;`);
emitStatement(`DROP TABLE IF EXISTS "${tablename}" CASCADE;`);
const colDefs: string[] = [];
for (const col of columns) {
@@ -168,11 +280,23 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
emit(`CREATE TABLE "${tablename}" (`);
emit(colDefs.join(",\n"));
emit(");");
emitStatementBoundary();
emit("");
}
const ownedSequences = sequences.filter((seq) => seq.owner_table && seq.owner_column);
if (ownedSequences.length > 0) {
emit("-- Sequence ownership");
for (const seq of ownedSequences) {
emitStatement(
`ALTER SEQUENCE "${seq.sequence_name}" OWNED BY "${seq.owner_table!}"."${seq.owner_column!}";`,
);
}
emit("");
}
// Foreign keys (after all tables created)
const fks = await sql<{
const allForeignKeys = await sql<{
constraint_name: string;
source_table: string;
source_columns: string[];
@@ -199,13 +323,16 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
GROUP BY c.conname, src.relname, tgt.relname, c.confupdtype, c.confdeltype
ORDER BY src.relname, c.conname
`;
const fks = allForeignKeys.filter(
(fk) => includedTableNames.has(fk.source_table) && includedTableNames.has(fk.target_table),
);
if (fks.length > 0) {
emit("-- Foreign keys");
for (const fk of fks) {
const srcCols = fk.source_columns.map((c) => `"${c}"`).join(", ");
const tgtCols = fk.target_columns.map((c) => `"${c}"`).join(", ");
emit(
emitStatement(
`ALTER TABLE "${fk.source_table}" ADD CONSTRAINT "${fk.constraint_name}" FOREIGN KEY (${srcCols}) REFERENCES "${fk.target_table}" (${tgtCols}) ON UPDATE ${fk.update_rule} ON DELETE ${fk.delete_rule};`,
);
}
@@ -213,7 +340,7 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
}
// Unique constraints
const uniques = await sql<{
const allUniqueConstraints = await sql<{
constraint_name: string;
tablename: string;
column_names: string[];
@@ -229,19 +356,20 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
GROUP BY c.conname, t.relname
ORDER BY t.relname, c.conname
`;
const uniques = allUniqueConstraints.filter((entry) => includedTableNames.has(entry.tablename));
if (uniques.length > 0) {
emit("-- Unique constraints");
for (const u of uniques) {
const cols = u.column_names.map((c) => `"${c}"`).join(", ");
emit(`ALTER TABLE "${u.tablename}" ADD CONSTRAINT "${u.constraint_name}" UNIQUE (${cols});`);
emitStatement(`ALTER TABLE "${u.tablename}" ADD CONSTRAINT "${u.constraint_name}" UNIQUE (${cols});`);
}
emit("");
}
// Indexes (non-primary, non-unique-constraint)
const indexes = await sql<{ indexdef: string }[]>`
SELECT indexdef
const allIndexes = await sql<{ tablename: string; indexdef: string }[]>`
SELECT tablename, indexdef
FROM pg_indexes
WHERE schemaname = 'public'
AND indexname NOT IN (
@@ -250,11 +378,12 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
)
ORDER BY tablename, indexname
`;
const indexes = allIndexes.filter((entry) => includedTableNames.has(entry.tablename));
if (indexes.length > 0) {
emit("-- Indexes");
for (const idx of indexes) {
emit(`${idx.indexdef};`);
emitStatement(`${idx.indexdef};`);
}
emit("");
}
@@ -278,42 +407,38 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
emit(`-- Data for: ${tablename} (${count[0]!.n} rows)`);
const rows = await sql`SELECT * FROM ${sql(tablename)}`.values();
const nullifiedColumns = nullifiedColumnsByTable.get(tablename) ?? new Set<string>();
for (const row of rows) {
const values = row.map((val: unknown) => {
const values = row.map((rawValue: unknown, index) => {
const columnName = cols[index]?.column_name;
const val = columnName && nullifiedColumns.has(columnName) ? null : rawValue;
if (val === null || val === undefined) return "NULL";
if (typeof val === "boolean") return val ? "true" : "false";
if (typeof val === "number") return String(val);
if (val instanceof Date) return `'${val.toISOString()}'`;
if (typeof val === "object") return `'${JSON.stringify(val).replace(/'/g, "''")}'`;
return `'${String(val).replace(/'/g, "''")}'`;
if (val instanceof Date) return formatSqlLiteral(val.toISOString());
if (typeof val === "object") return formatSqlLiteral(JSON.stringify(val));
return formatSqlLiteral(String(val));
});
emit(`INSERT INTO "${tablename}" (${colNames}) VALUES (${values.join(", ")});`);
emitStatement(`INSERT INTO "${tablename}" (${colNames}) VALUES (${values.join(", ")});`);
}
emit("");
}
// Sequence values
const sequences = await sql<{ sequence_name: string }[]>`
SELECT sequence_name
FROM information_schema.sequences
WHERE sequence_schema = 'public'
ORDER BY sequence_name
`;
if (sequences.length > 0) {
emit("-- Sequence values");
for (const seq of sequences) {
const val = await sql<{ last_value: string }[]>`
SELECT last_value::text FROM ${sql(seq.sequence_name)}
const val = await sql<{ last_value: string; is_called: boolean }[]>`
SELECT last_value::text, is_called FROM ${sql(seq.sequence_name)}
`;
if (val[0]) {
emit(`SELECT setval('"${seq.sequence_name}"', ${val[0].last_value});`);
emitStatement(`SELECT setval('"${seq.sequence_name}"', ${val[0].last_value}, ${val[0].is_called ? "true" : "false"});`);
}
}
emit("");
}
emit("COMMIT;");
emitStatement("COMMIT;");
emit("");
// Write the backup file
@@ -340,7 +465,25 @@ export async function runDatabaseRestore(opts: RunDatabaseRestoreOptions): Promi
try {
await sql`SELECT 1`;
await sql.file(opts.backupFile).execute();
const contents = await readFile(opts.backupFile, "utf8");
const statements = contents
.split(STATEMENT_BREAKPOINT)
.map((statement) => statement.trim())
.filter((statement) => statement.length > 0);
for (const statement of statements) {
await sql.unsafe(statement).execute();
}
} catch (error) {
const statementPreview = typeof error === "object" && error !== null && typeof (error as Record<string, unknown>).query === "string"
? String((error as Record<string, unknown>).query)
.split(/\r?\n/)
.map((line) => line.trim())
.find((line) => line.length > 0 && !line.startsWith("--"))
: null;
throw new Error(
`Failed to restore ${basename(opts.backupFile)}: ${sanitizeRestoreErrorMessage(error)}${statementPreview ? ` [statement: ${statementPreview.slice(0, 120)}]` : ""}`,
);
} finally {
await sql.end();
}