diff --git a/AGENTS.md b/AGENTS.md index e4b5b514..dad6684f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -78,6 +78,9 @@ If you change schema/API behavior, update all impacted layers: 4. Do not replace strategic docs wholesale unless asked. Prefer additive updates. Keep `doc/SPEC.md` and `doc/SPEC-implementation.md` aligned. +5. Keep plan docs dated and centralized. +New plan documents belong in `doc/plans/` and should use `YYYY-MM-DD-slug.md` filenames. + ## 6. Database Change Workflow When changing data model: diff --git a/Dockerfile b/Dockerfile index 3fe1f2b2..014113e4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,7 @@ COPY packages/adapter-utils/package.json packages/adapter-utils/ COPY packages/adapters/claude-local/package.json packages/adapters/claude-local/ COPY packages/adapters/codex-local/package.json packages/adapters/codex-local/ COPY packages/adapters/cursor-local/package.json packages/adapters/cursor-local/ +COPY packages/adapters/gemini-local/package.json packages/adapters/gemini-local/ COPY packages/adapters/openclaw-gateway/package.json packages/adapters/openclaw-gateway/ COPY packages/adapters/opencode-local/package.json packages/adapters/opencode-local/ COPY packages/adapters/pi-local/package.json packages/adapters/pi-local/ diff --git a/cli/src/commands/client/agent.ts b/cli/src/commands/client/agent.ts index 36eb04e6..2c294628 100644 --- a/cli/src/commands/client/agent.ts +++ b/cli/src/commands/client/agent.ts @@ -1,5 +1,9 @@ import { Command } from "commander"; import type { Agent } from "@paperclipai/shared"; +import { + removeMaintainerOnlySkillSymlinks, + resolvePaperclipSkillsDir, +} from "@paperclipai/adapter-utils/server-utils"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; @@ -34,15 +38,12 @@ interface SkillsInstallSummary { tool: "codex" | "claude"; target: string; linked: string[]; + removed: string[]; skipped: string[]; failed: Array<{ name: string; error: string }>; } const __moduleDir = path.dirname(fileURLToPath(import.meta.url)); -const PAPERCLIP_SKILLS_CANDIDATES = [ - path.resolve(__moduleDir, "../../../../../skills"), // dev: cli/src/commands/client -> repo root/skills - path.resolve(process.cwd(), "skills"), -]; function codexSkillsHome(): string { const fromEnv = process.env.CODEX_HOME?.trim(); @@ -56,14 +57,6 @@ function claudeSkillsHome(): string { return path.join(base, "skills"); } -async function resolvePaperclipSkillsDir(): Promise { - for (const candidate of PAPERCLIP_SKILLS_CANDIDATES) { - const isDir = await fs.stat(candidate).then((s) => s.isDirectory()).catch(() => false); - if (isDir) return candidate; - } - return null; -} - async function installSkillsForTarget( sourceSkillsDir: string, targetSkillsDir: string, @@ -73,20 +66,65 @@ async function installSkillsForTarget( tool, target: targetSkillsDir, linked: [], + removed: [], skipped: [], failed: [], }; await fs.mkdir(targetSkillsDir, { recursive: true }); const entries = await fs.readdir(sourceSkillsDir, { withFileTypes: true }); + summary.removed = await removeMaintainerOnlySkillSymlinks( + targetSkillsDir, + entries.filter((entry) => entry.isDirectory()).map((entry) => entry.name), + ); for (const entry of entries) { if (!entry.isDirectory()) continue; const source = path.join(sourceSkillsDir, entry.name); const target = path.join(targetSkillsDir, entry.name); const existing = await fs.lstat(target).catch(() => null); if (existing) { - summary.skipped.push(entry.name); - continue; + if (existing.isSymbolicLink()) { + let linkedPath: string | null = null; + try { + linkedPath = await fs.readlink(target); + } catch (err) { + await fs.unlink(target); + try { + await fs.symlink(source, target); + summary.linked.push(entry.name); + continue; + } catch (linkErr) { + summary.failed.push({ + name: entry.name, + error: + err instanceof Error && linkErr instanceof Error + ? `${err.message}; then ${linkErr.message}` + : err instanceof Error + ? err.message + : `Failed to recover broken symlink: ${String(err)}`, + }); + continue; + } + } + + const resolvedLinkedPath = path.isAbsolute(linkedPath) + ? linkedPath + : path.resolve(path.dirname(target), linkedPath); + const linkedTargetExists = await fs + .stat(resolvedLinkedPath) + .then(() => true) + .catch(() => false); + + if (!linkedTargetExists) { + await fs.unlink(target); + } else { + summary.skipped.push(entry.name); + continue; + } + } else { + summary.skipped.push(entry.name); + continue; + } } try { @@ -210,7 +248,7 @@ export function registerAgentCommands(program: Command): void { const installSummaries: SkillsInstallSummary[] = []; if (opts.installSkills !== false) { - const skillsDir = await resolvePaperclipSkillsDir(); + const skillsDir = await resolvePaperclipSkillsDir(__moduleDir, [path.resolve(process.cwd(), "skills")]); if (!skillsDir) { throw new Error( "Could not locate local Paperclip skills directory. Expected ./skills in the repo checkout.", @@ -258,7 +296,7 @@ export function registerAgentCommands(program: Command): void { if (installSummaries.length > 0) { for (const summary of installSummaries) { console.log( - `${summary.tool}: linked=${summary.linked.length} skipped=${summary.skipped.length} failed=${summary.failed.length} target=${summary.target}`, + `${summary.tool}: linked=${summary.linked.length} removed=${summary.removed.length} skipped=${summary.skipped.length} failed=${summary.failed.length} target=${summary.target}`, ); for (const failed of summary.failed) { console.log(` failed ${failed.name}: ${failed.error}`); diff --git a/cli/src/commands/worktree.ts b/cli/src/commands/worktree.ts index 4f0ed887..582bb5dd 100644 --- a/cli/src/commands/worktree.ts +++ b/cli/src/commands/worktree.ts @@ -83,6 +83,7 @@ type EmbeddedPostgresCtor = new (opts: { password: string; port: number; persistent: boolean; + initdbFlags?: string[]; onLog?: (message: unknown) => void; onError?: (message: unknown) => void; }) => EmbeddedPostgresInstance; @@ -127,6 +128,8 @@ function isCurrentSourceConfigPath(sourceConfigPath: string): boolean { return path.resolve(currentConfigPath) === path.resolve(sourceConfigPath); } +const WORKTREE_NAME_PREFIX = "paperclip-"; + function resolveWorktreeMakeName(name: string): string { const value = nonEmpty(name); if (!value) { @@ -137,7 +140,15 @@ function resolveWorktreeMakeName(name: string): string { "Worktree name must contain only letters, numbers, dots, underscores, or dashes.", ); } - return value; + return value.startsWith(WORKTREE_NAME_PREFIX) ? value : `${WORKTREE_NAME_PREFIX}${value}`; +} + +function resolveWorktreeHome(explicit?: string): string { + return explicit ?? process.env.PAPERCLIP_WORKTREES_DIR ?? DEFAULT_WORKTREE_HOME; +} + +function resolveWorktreeStartPoint(explicit?: string): string | undefined { + return explicit ?? nonEmpty(process.env.PAPERCLIP_WORKTREE_START_POINT) ?? undefined; } export function resolveWorktreeMakeTargetPath(name: string): string { @@ -514,6 +525,7 @@ async function ensureEmbeddedPostgres(dataDir: string, preferredPort: number): P password: "paperclip", port, persistent: true, + initdbFlags: ["--encoding=UTF8", "--locale=C"], onLog: () => {}, onError: () => {}, }); @@ -622,7 +634,7 @@ async function runWorktreeInit(opts: WorktreeInitOptions): Promise { const instanceId = sanitizeWorktreeInstanceId(opts.instance ?? name); const paths = resolveWorktreeLocalPaths({ cwd, - homeDir: opts.home ?? DEFAULT_WORKTREE_HOME, + homeDir: resolveWorktreeHome(opts.home), instanceId, }); const sourceConfigPath = resolveSourceConfigPath(opts); @@ -731,6 +743,7 @@ export async function worktreeMakeCommand(nameArg: string, opts: WorktreeMakeOpt p.intro(pc.bgCyan(pc.black(" paperclipai worktree:make "))); const name = resolveWorktreeMakeName(nameArg); + const startPoint = resolveWorktreeStartPoint(opts.startPoint); const sourceCwd = process.cwd(); const targetPath = resolveWorktreeMakeTargetPath(name); if (existsSync(targetPath)) { @@ -738,8 +751,8 @@ export async function worktreeMakeCommand(nameArg: string, opts: WorktreeMakeOpt } mkdirSync(path.dirname(targetPath), { recursive: true }); - if (opts.startPoint) { - const [remote] = opts.startPoint.split("/", 1); + if (startPoint) { + const [remote] = startPoint.split("/", 1); try { execFileSync("git", ["fetch", remote], { cwd: sourceCwd, @@ -755,8 +768,8 @@ export async function worktreeMakeCommand(nameArg: string, opts: WorktreeMakeOpt const worktreeArgs = resolveGitWorktreeAddArgs({ branchName: name, targetPath, - branchExists: !opts.startPoint && localBranchExists(sourceCwd, name), - startPoint: opts.startPoint, + branchExists: !startPoint && localBranchExists(sourceCwd, name), + startPoint, }); const spinner = p.spinner(); @@ -799,6 +812,232 @@ export async function worktreeMakeCommand(nameArg: string, opts: WorktreeMakeOpt } } +type WorktreeCleanupOptions = { + instance?: string; + home?: string; + force?: boolean; +}; + +type GitWorktreeListEntry = { + worktree: string; + branch: string | null; + bare: boolean; + detached: boolean; +}; + +function parseGitWorktreeList(cwd: string): GitWorktreeListEntry[] { + const raw = execFileSync("git", ["worktree", "list", "--porcelain"], { + cwd, + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + }); + const entries: GitWorktreeListEntry[] = []; + let current: Partial = {}; + for (const line of raw.split("\n")) { + if (line.startsWith("worktree ")) { + current = { worktree: line.slice("worktree ".length) }; + } else if (line.startsWith("branch ")) { + current.branch = line.slice("branch ".length); + } else if (line === "bare") { + current.bare = true; + } else if (line === "detached") { + current.detached = true; + } else if (line === "" && current.worktree) { + entries.push({ + worktree: current.worktree, + branch: current.branch ?? null, + bare: current.bare ?? false, + detached: current.detached ?? false, + }); + current = {}; + } + } + if (current.worktree) { + entries.push({ + worktree: current.worktree, + branch: current.branch ?? null, + bare: current.bare ?? false, + detached: current.detached ?? false, + }); + } + return entries; +} + +function branchHasUniqueCommits(cwd: string, branchName: string): boolean { + try { + const output = execFileSync( + "git", + ["log", "--oneline", branchName, "--not", "--remotes", "--exclude", `refs/heads/${branchName}`, "--branches"], + { cwd, encoding: "utf8", stdio: ["ignore", "pipe", "pipe"] }, + ).trim(); + return output.length > 0; + } catch { + return false; + } +} + +function branchExistsOnAnyRemote(cwd: string, branchName: string): boolean { + try { + const output = execFileSync( + "git", + ["branch", "-r", "--list", `*/${branchName}`], + { cwd, encoding: "utf8", stdio: ["ignore", "pipe", "pipe"] }, + ).trim(); + return output.length > 0; + } catch { + return false; + } +} + +function worktreePathHasUncommittedChanges(worktreePath: string): boolean { + try { + const output = execFileSync( + "git", + ["status", "--porcelain"], + { cwd: worktreePath, encoding: "utf8", stdio: ["ignore", "pipe", "pipe"] }, + ).trim(); + return output.length > 0; + } catch { + return false; + } +} + +export async function worktreeCleanupCommand(nameArg: string, opts: WorktreeCleanupOptions): Promise { + printPaperclipCliBanner(); + p.intro(pc.bgCyan(pc.black(" paperclipai worktree:cleanup "))); + + const name = resolveWorktreeMakeName(nameArg); + const sourceCwd = process.cwd(); + const targetPath = resolveWorktreeMakeTargetPath(name); + const instanceId = sanitizeWorktreeInstanceId(opts.instance ?? name); + const homeDir = path.resolve(expandHomePrefix(resolveWorktreeHome(opts.home))); + const instanceRoot = path.resolve(homeDir, "instances", instanceId); + + // ── 1. Assess current state ────────────────────────────────────────── + + const hasBranch = localBranchExists(sourceCwd, name); + const hasTargetDir = existsSync(targetPath); + const hasInstanceData = existsSync(instanceRoot); + + const worktrees = parseGitWorktreeList(sourceCwd); + const linkedWorktree = worktrees.find( + (wt) => wt.branch === `refs/heads/${name}` || path.resolve(wt.worktree) === path.resolve(targetPath), + ); + + if (!hasBranch && !hasTargetDir && !hasInstanceData && !linkedWorktree) { + p.log.info("Nothing to clean up — no branch, worktree directory, or instance data found."); + p.outro(pc.green("Already clean.")); + return; + } + + // ── 2. Safety checks ──────────────────────────────────────────────── + + const problems: string[] = []; + + if (hasBranch && branchHasUniqueCommits(sourceCwd, name)) { + const onRemote = branchExistsOnAnyRemote(sourceCwd, name); + if (onRemote) { + p.log.info( + `Branch "${name}" has unique local commits, but the branch also exists on a remote — safe to delete locally.`, + ); + } else { + problems.push( + `Branch "${name}" has commits not found on any other branch or remote. ` + + `Deleting it will lose work. Push it first, or use --force.`, + ); + } + } + + if (hasTargetDir && worktreePathHasUncommittedChanges(targetPath)) { + problems.push( + `Worktree directory ${targetPath} has uncommitted changes. Commit or stash first, or use --force.`, + ); + } + + if (problems.length > 0 && !opts.force) { + for (const problem of problems) { + p.log.error(problem); + } + throw new Error("Safety checks failed. Resolve the issues above or re-run with --force."); + } + if (problems.length > 0 && opts.force) { + for (const problem of problems) { + p.log.warning(`Overridden by --force: ${problem}`); + } + } + + // ── 3. Clean up (idempotent steps) ────────────────────────────────── + + // 3a. Remove the git worktree registration + if (linkedWorktree) { + const worktreeDirExists = existsSync(linkedWorktree.worktree); + const spinner = p.spinner(); + if (worktreeDirExists) { + spinner.start(`Removing git worktree at ${linkedWorktree.worktree}...`); + try { + const removeArgs = ["worktree", "remove", linkedWorktree.worktree]; + if (opts.force) removeArgs.push("--force"); + execFileSync("git", removeArgs, { + cwd: sourceCwd, + stdio: ["ignore", "pipe", "pipe"], + }); + spinner.stop(`Removed git worktree at ${linkedWorktree.worktree}.`); + } catch (error) { + spinner.stop(pc.yellow(`Could not remove worktree cleanly, will prune instead.`)); + p.log.warning(extractExecSyncErrorMessage(error) ?? String(error)); + } + } else { + spinner.start("Pruning stale worktree entry..."); + execFileSync("git", ["worktree", "prune"], { + cwd: sourceCwd, + stdio: ["ignore", "pipe", "pipe"], + }); + spinner.stop("Pruned stale worktree entry."); + } + } else { + // Even without a linked worktree, prune to clean up any orphaned entries + execFileSync("git", ["worktree", "prune"], { + cwd: sourceCwd, + stdio: ["ignore", "pipe", "pipe"], + }); + } + + // 3b. Remove the worktree directory if it still exists (e.g. partial creation) + if (existsSync(targetPath)) { + const spinner = p.spinner(); + spinner.start(`Removing worktree directory ${targetPath}...`); + rmSync(targetPath, { recursive: true, force: true }); + spinner.stop(`Removed worktree directory ${targetPath}.`); + } + + // 3c. Delete the local branch (now safe — worktree is gone) + if (localBranchExists(sourceCwd, name)) { + const spinner = p.spinner(); + spinner.start(`Deleting local branch "${name}"...`); + try { + const deleteFlag = opts.force ? "-D" : "-d"; + execFileSync("git", ["branch", deleteFlag, name], { + cwd: sourceCwd, + stdio: ["ignore", "pipe", "pipe"], + }); + spinner.stop(`Deleted local branch "${name}".`); + } catch (error) { + spinner.stop(pc.yellow(`Could not delete branch "${name}".`)); + p.log.warning(extractExecSyncErrorMessage(error) ?? String(error)); + } + } + + // 3d. Remove instance data + if (existsSync(instanceRoot)) { + const spinner = p.spinner(); + spinner.start(`Removing instance data at ${instanceRoot}...`); + rmSync(instanceRoot, { recursive: true, force: true }); + spinner.stop(`Removed instance data at ${instanceRoot}.`); + } + + p.outro(pc.green("Cleanup complete.")); +} + export async function worktreeEnvCommand(opts: WorktreeEnvOptions): Promise { const configPath = resolveConfigPath(opts.config); const envPath = resolvePaperclipEnvFile(configPath); @@ -825,10 +1064,10 @@ export function registerWorktreeCommands(program: Command): void { program .command("worktree:make") .description("Create ~/NAME as a git worktree, then initialize an isolated Paperclip instance inside it") - .argument("", "Worktree directory and branch name (created at ~/NAME)") - .option("--start-point ", "Remote ref to base the new branch on (e.g. origin/main)") + .argument("", "Worktree name — auto-prefixed with paperclip- if needed (created at ~/paperclip-NAME)") + .option("--start-point ", "Remote ref to base the new branch on (env: PAPERCLIP_WORKTREE_START_POINT)") .option("--instance ", "Explicit isolated instance id") - .option("--home ", `Home root for worktree instances (default: ${DEFAULT_WORKTREE_HOME})`) + .option("--home ", `Home root for worktree instances (env: PAPERCLIP_WORKTREES_DIR, default: ${DEFAULT_WORKTREE_HOME})`) .option("--from-config ", "Source config.json to seed from") .option("--from-data-dir ", "Source PAPERCLIP_HOME used when deriving the source config") .option("--from-instance ", "Source instance id when deriving the source config", "default") @@ -844,7 +1083,7 @@ export function registerWorktreeCommands(program: Command): void { .description("Create repo-local config/env and an isolated instance for this worktree") .option("--name ", "Display name used to derive the instance id") .option("--instance ", "Explicit isolated instance id") - .option("--home ", `Home root for worktree instances (default: ${DEFAULT_WORKTREE_HOME})`) + .option("--home ", `Home root for worktree instances (env: PAPERCLIP_WORKTREES_DIR, default: ${DEFAULT_WORKTREE_HOME})`) .option("--from-config ", "Source config.json to seed from") .option("--from-data-dir ", "Source PAPERCLIP_HOME used when deriving the source config") .option("--from-instance ", "Source instance id when deriving the source config", "default") @@ -861,4 +1100,13 @@ export function registerWorktreeCommands(program: Command): void { .option("-c, --config ", "Path to config file") .option("--json", "Print JSON instead of shell exports") .action(worktreeEnvCommand); + + program + .command("worktree:cleanup") + .description("Safely remove a worktree, its branch, and its isolated instance data") + .argument("", "Worktree name — auto-prefixed with paperclip- if needed") + .option("--instance ", "Explicit instance id (if different from the worktree name)") + .option("--home ", `Home root for worktree instances (env: PAPERCLIP_WORKTREES_DIR, default: ${DEFAULT_WORKTREE_HOME})`) + .option("--force", "Bypass safety checks (uncommitted changes, unique commits)", false) + .action(worktreeCleanupCommand); } diff --git a/doc/PRODUCT.md b/doc/PRODUCT.md index 741df662..f835889c 100644 --- a/doc/PRODUCT.md +++ b/doc/PRODUCT.md @@ -94,3 +94,53 @@ Canonical mode design and command expectations live in `doc/DEPLOYMENT-MODES.md` ## Further Detail See [SPEC.md](./SPEC.md) for the full technical specification and [TASKS.md](./TASKS.md) for the task management data model. + +--- + +Paperclip’s core identity is a **control plane for autonomous AI companies**, centered on **companies, org charts, goals, issues/comments, heartbeats, budgets, approvals, and board governance**. The public docs are also explicit about the current boundaries: **tasks/comments are the built-in communication model**, Paperclip is **not a chatbot**, and it is **not a code review tool**. The roadmap already points toward **easier onboarding, cloud agents, easier agent configuration, plugins, better docs, and ClipMart/ClipHub-style reusable companies/templates**. + +## What Paperclip should do vs. not do + +**Do** + +- Stay **board-level and company-level**. Users should manage goals, orgs, budgets, approvals, and outputs. +- Make the first five minutes feel magical: install, answer a few questions, see a CEO do something real. +- Keep work anchored to **issues/comments/projects/goals**, even if the surface feels conversational. +- Treat **agency / internal team / startup** as the same underlying abstraction with different templates and labels. +- Make outputs first-class: files, docs, reports, previews, links, screenshots. +- Provide **hooks into engineering workflows**: worktrees, preview servers, PR links, external review tools. +- Use **plugins** for edge cases like rich chat, knowledge bases, doc editors, custom tracing. + +**Do not** + +- Do not make the core product a general chat app. The current product definition is explicitly task/comment-centric and “not a chatbot,” and that boundary is valuable. +- Do not build a complete Jira/GitHub replacement. The repo/docs already position Paperclip as organization orchestration, not focused on pull-request review. +- Do not build enterprise-grade RBAC first. The current V1 spec still treats multi-board governance and fine-grained human permissions as out of scope, so the first multi-user version should be coarse and company-scoped. +- Do not lead with raw bash logs and transcripts. Default view should be human-readable intent/progress, with raw detail beneath. +- Do not force users to understand provider/API-key plumbing unless absolutely necessary. There are active onboarding/auth issues already; friction here is clearly real. + +## Specific design goals + +1. **Time-to-first-success under 5 minutes** + A fresh user should go from install to “my CEO completed a first task” in one sitting. + +2. **Board-level abstraction always wins** + The default UI should answer: what is the company doing, who is doing it, why does it matter, what did it cost, and what needs my approval. + +3. **Conversation stays attached to work objects** + “Chat with CEO” should still resolve to strategy threads, decisions, tasks, or approvals. + +4. **Progressive disclosure** + Top layer: human-readable summary. Middle layer: checklist/steps/artifacts. Bottom layer: raw logs/tool calls/transcript. + +5. **Output-first** + Work is not done until the user can see the result: file, document, preview link, screenshot, plan, or PR. + +6. **Local-first, cloud-ready** + The mental model should not change between local solo use and shared/private or public/cloud deployment. + +7. **Safe autonomy** + Auto mode is allowed; hidden token burn is not. + +8. **Thin core, rich edges** + Put optional chat, knowledge, and special surfaces into plugins/extensions rather than bloating the control plane. diff --git a/doc/plans/module-system.md b/doc/plans/2026-02-16-module-system.md similarity index 100% rename from doc/plans/module-system.md rename to doc/plans/2026-02-16-module-system.md diff --git a/doc/plans/agent-authentication-implementation.md b/doc/plans/2026-02-18-agent-authentication-implementation.md similarity index 100% rename from doc/plans/agent-authentication-implementation.md rename to doc/plans/2026-02-18-agent-authentication-implementation.md diff --git a/doc/plans/agent-authentication.md b/doc/plans/2026-02-18-agent-authentication.md similarity index 100% rename from doc/plans/agent-authentication.md rename to doc/plans/2026-02-18-agent-authentication.md diff --git a/doc/plans/agent-mgmt-followup-plan.md b/doc/plans/2026-02-19-agent-mgmt-followup-plan.md similarity index 100% rename from doc/plans/agent-mgmt-followup-plan.md rename to doc/plans/2026-02-19-agent-mgmt-followup-plan.md diff --git a/doc/plans/ceo-agent-creation-and-hiring.md b/doc/plans/2026-02-19-ceo-agent-creation-and-hiring.md similarity index 100% rename from doc/plans/ceo-agent-creation-and-hiring.md rename to doc/plans/2026-02-19-ceo-agent-creation-and-hiring.md diff --git a/doc/plans/issue-run-orchestration-plan.md b/doc/plans/2026-02-20-issue-run-orchestration-plan.md similarity index 100% rename from doc/plans/issue-run-orchestration-plan.md rename to doc/plans/2026-02-20-issue-run-orchestration-plan.md diff --git a/doc/plans/storage-system-implementation.md b/doc/plans/2026-02-20-storage-system-implementation.md similarity index 100% rename from doc/plans/storage-system-implementation.md rename to doc/plans/2026-02-20-storage-system-implementation.md diff --git a/doc/plan/humans-and-permissions-implementation.md b/doc/plans/2026-02-21-humans-and-permissions-implementation.md similarity index 100% rename from doc/plan/humans-and-permissions-implementation.md rename to doc/plans/2026-02-21-humans-and-permissions-implementation.md diff --git a/doc/plan/humans-and-permissions.md b/doc/plans/2026-02-21-humans-and-permissions.md similarity index 100% rename from doc/plan/humans-and-permissions.md rename to doc/plans/2026-02-21-humans-and-permissions.md diff --git a/doc/plans/cursor-cloud-adapter.md b/doc/plans/2026-02-23-cursor-cloud-adapter.md similarity index 100% rename from doc/plans/cursor-cloud-adapter.md rename to doc/plans/2026-02-23-cursor-cloud-adapter.md diff --git a/doc/plans/deployment-auth-mode-consolidation.md b/doc/plans/2026-02-23-deployment-auth-mode-consolidation.md similarity index 100% rename from doc/plans/deployment-auth-mode-consolidation.md rename to doc/plans/2026-02-23-deployment-auth-mode-consolidation.md diff --git a/doc/plans/workspace-strategy-and-git-worktrees.md b/doc/plans/2026-03-10-workspace-strategy-and-git-worktrees.md similarity index 100% rename from doc/plans/workspace-strategy-and-git-worktrees.md rename to doc/plans/2026-03-10-workspace-strategy-and-git-worktrees.md diff --git a/doc/plans/2026-03-11-agent-chat-ui-and-issue-backed-conversations.md b/doc/plans/2026-03-11-agent-chat-ui-and-issue-backed-conversations.md new file mode 100644 index 00000000..7364b6d0 --- /dev/null +++ b/doc/plans/2026-03-11-agent-chat-ui-and-issue-backed-conversations.md @@ -0,0 +1,329 @@ +# Agent Chat UI and Issue-Backed Conversations + +## Context + +`PAP-475` asks two related questions: + +1. What UI kit should Paperclip use if we add a chat surface with an agent? +2. How should chat fit the product without breaking the current issue-centric model? + +This is not only a component-library decision. In Paperclip today: + +- V1 explicitly says communication is `tasks + comments only`, with no separate chat system. +- Issues already carry assignment, audit trail, billing code, project linkage, goal linkage, and active run linkage. +- Live run streaming already exists on issue detail pages. +- Agent sessions already persist by `taskKey`, and today `taskKey` falls back to `issueId`. +- The OpenClaw gateway adapter already supports an issue-scoped session key strategy. + +That means the cheapest useful path is not "add a second messaging product inside Paperclip." It is "add a better conversational UI on top of issue and run primitives we already have." + +## Current Constraints From the Codebase + +### Durable work object + +The durable object in Paperclip is the issue, not a chat thread. + +- `IssueDetail` already combines comments, linked runs, live runs, and activity into one timeline. +- `CommentThread` already renders markdown comments and supports reply/reassignment flows. +- `LiveRunWidget` already renders streaming assistant/tool/system output for active runs. + +### Session behavior + +Session continuity is already task-shaped. + +- `heartbeat.ts` derives `taskKey` from `taskKey`, then `taskId`, then `issueId`. +- `agent_task_sessions` stores session state per company + agent + adapter + task key. +- OpenClaw gateway supports `sessionKeyStrategy=issue|fixed|run`, and `issue` already matches the Paperclip mental model well. + +That means "chat with the CEO about this issue" naturally maps to one durable session per issue today without inventing a second session system. + +### Billing behavior + +Billing is already issue-aware. + +- `cost_events` can attach to `issueId`, `projectId`, `goalId`, and `billingCode`. +- heartbeat context already propagates issue linkage into runs and cost rollups. + +If chat leaves the issue model, Paperclip would need a second billing story. That is avoidable. + +## UI Kit Recommendation + +## Recommendation: `assistant-ui` + +Use `assistant-ui` as the chat presentation layer. + +Why it fits Paperclip: + +- It is a real chat UI kit, not just a hook. +- It is composable and aligned with shadcn-style primitives, which matches the current UI stack well. +- It explicitly supports custom backends, which matters because Paperclip talks to agents through issue comments, heartbeats, and run streams rather than direct provider calls. +- It gives us polished chat affordances quickly: message list, composer, streaming text, attachments, thread affordances, and markdown-oriented rendering. + +Why not make "the Vercel one" the primary choice: + +- Vercel AI SDK is stronger today than the older "just `useChat` over `/api/chat`" framing. Its transport layer is flexible and can support custom protocols. +- But AI SDK is still better understood here as a transport/runtime protocol layer than as the best end-user chat surface for Paperclip. +- Paperclip does not need Vercel to own message state, persistence, or the backend contract. Paperclip already has its own issue, run, and session model. + +So the clean split is: + +- `assistant-ui` for UI primitives +- Paperclip-owned runtime/store for state, persistence, and transport +- optional AI SDK usage later only if we want its stream protocol or client transport abstraction + +## Product Options + +### Option A: Separate chat object + +Create a new top-level chat/thread model unrelated to issues. + +Pros: + +- clean mental model if users want freeform conversation +- easy to hide from issue boards + +Cons: + +- breaks the current V1 product decision that communication is issue-centric +- needs new persistence, billing, session, permissions, activity, and wakeup rules +- creates a second "why does this exist?" object beside issues +- makes "pick up an old chat" a separate retrieval problem + +Verdict: not recommended for V1. + +### Option B: Every chat is an issue + +Treat chat as a UI mode over an issue. The issue remains the durable record. + +Pros: + +- matches current product spec +- billing, runs, comments, approvals, and activity already work +- sessions already resume on issue identity +- works with all adapters, including OpenClaw, without new agent auth or a second API surface + +Cons: + +- some chats are not really "tasks" in a board sense +- onboarding and review conversations may clutter normal issue lists + +Verdict: best V1 foundation. + +### Option C: Hybrid with hidden conversation issues + +Back every conversation with an issue, but allow a conversation-flavored issue mode that is hidden from default execution boards unless promoted. + +Pros: + +- preserves the issue-centric backend +- gives onboarding/review chat a cleaner UX +- preserves billing and session continuity + +Cons: + +- requires extra UI rules and possibly a small schema or filtering addition +- can become a disguised second system if not kept narrow + +Verdict: likely the right product shape after a basic issue-backed MVP. + +## Recommended Product Model + +### Phase 1 product decision + +For the first implementation, chat should be issue-backed. + +More specifically: + +- the board opens a chat surface for an issue +- sending a message is a comment mutation on that issue +- the assigned agent is woken through the existing issue-comment flow +- streaming output comes from the existing live run stream for that issue +- durable assistant output remains comments and run history, not an extra transcript store + +This keeps Paperclip honest about what it is: + +- the control plane stays issue-centric +- chat is a better way to interact with issue work, not a new collaboration product + +### Onboarding and CEO conversations + +For onboarding, weekly reviews, and "chat with the CEO", use a conversation issue rather than a global chat tab. + +Suggested shape: + +- create a board-initiated issue assigned to the CEO +- mark it as conversation-flavored in UI treatment +- optionally hide it from normal issue boards by default later +- keep all cost/run/session linkage on that issue + +This solves several concerns at once: + +- no separate API key or direct provider wiring is needed +- the same CEO adapter is used +- old conversations are recovered through normal issue history +- the CEO can still create or update real child issues from the conversation + +## Session Model + +### V1 + +Use one durable conversation session per issue. + +That already matches current behavior: + +- adapter task sessions persist against `taskKey` +- `taskKey` already falls back to `issueId` +- OpenClaw already supports an issue-scoped session key + +This means "resume the CEO conversation later" works by reopening the same issue and waking the same agent on the same issue. + +### What not to add yet + +Do not add multi-thread-per-issue chat in the first pass. + +If Paperclip later needs several parallel threads on one issue, then add an explicit conversation identity and derive: + +- `taskKey = issue::conversation:` +- OpenClaw `sessionKey = paperclip:conversation:` + +Until that requirement becomes real, one issue == one durable conversation is the simpler and better rule. + +## Billing Model + +Chat should not invent a separate billing pipeline. + +All chat cost should continue to roll up through the issue: + +- `cost_events.issueId` +- project and goal rollups through existing relationships +- issue `billingCode` when present + +If a conversation is important enough to exist, it is important enough to have a durable issue-backed audit and cost trail. + +This is another reason ephemeral freeform chat should not be the default. + +## UI Architecture + +### Recommended stack + +1. Keep Paperclip as the source of truth for message history and run state. +2. Add `assistant-ui` as the rendering/composer layer. +3. Build a Paperclip runtime adapter that maps: + - issue comments -> user/assistant messages + - live run deltas -> streaming assistant messages + - issue attachments -> chat attachments +4. Keep current markdown rendering and code-block support where possible. + +### Interaction flow + +1. Board opens issue detail in "Chat" mode. +2. Existing comment history is mapped into chat messages. +3. When the board sends a message: + - `POST /api/issues/{id}/comments` + - optionally interrupt the active run if the UX wants "send and replace current response" +4. Existing issue comment wakeup logic wakes the assignee. +5. Existing `/issues/{id}/live-runs` and `/issues/{id}/active-run` data feeds drive streaming. +6. When the run completes, durable state remains in comments/runs/activity as it does now. + +### Why this fits the current code + +Paperclip already has most of the backend pieces: + +- issue comments +- run timeline +- run log and event streaming +- markdown rendering +- attachment support +- assignee wakeups on comments + +The missing piece is mostly the presentation and the mapping layer, not a new backend domain. + +## Agent Scope + +Do not launch this as "chat with every agent." + +Start narrower: + +- onboarding chat with CEO +- workflow/review chat with CEO +- maybe selected exec roles later + +Reasons: + +- it keeps the feature from becoming a second inbox/chat product +- it limits permission and UX questions early +- it matches the stated product demand + +If direct chat with other agents becomes useful later, the same issue-backed pattern can expand cleanly. + +## Recommended Delivery Phases + +### Phase 1: Chat UI on existing issues + +- add a chat presentation mode to issue detail +- use `assistant-ui` +- map comments + live runs into the chat surface +- no schema change +- no new API surface + +This is the highest-leverage step because it tests whether the UX is actually useful before product model expansion. + +### Phase 2: Conversation-flavored issues for CEO chat + +- add a lightweight conversation classification +- support creation of CEO conversation issues from onboarding and workflow entry points +- optionally hide these from normal backlog/board views by default + +The smallest implementation could be a label or issue metadata flag. If it becomes important enough, then promote it to a first-class issue subtype later. + +### Phase 3: Promotion and thread splitting only if needed + +Only if we later see a real need: + +- allow promoting a conversation to a formal task issue +- allow several threads per issue with explicit conversation identity + +This should be demand-driven, not designed up front. + +## Clear Recommendation + +If the question is "what should we use?", the answer is: + +- use `assistant-ui` for the chat UI +- do not treat raw Vercel AI SDK UI hooks as the main product answer +- keep chat issue-backed in V1 +- use the current issue comment + run + session + billing model rather than inventing a parallel chat subsystem + +If the question is "how should we think about chat in Paperclip?", the answer is: + +- chat is a mode of interacting with issue-backed agent work +- not a separate product silo +- not an excuse to stop tracing work, cost, and session history back to the issue + +## Implementation Notes + +### Immediate implementation target + +The most defensible first build is: + +- add a chat tab or chat-focused layout on issue detail +- back it with the currently assigned agent on that issue +- use `assistant-ui` primitives over existing comments and live run events + +### Defer these until proven necessary + +- standalone global chat objects +- multi-thread chat inside one issue +- chat with every agent in the org +- a second persistence layer for message history +- separate cost tracking for chats + +## References + +- V1 communication model: `doc/SPEC-implementation.md` +- Current issue/comment/run UI: `ui/src/pages/IssueDetail.tsx`, `ui/src/components/CommentThread.tsx`, `ui/src/components/LiveRunWidget.tsx` +- Session persistence and task key derivation: `server/src/services/heartbeat.ts`, `packages/db/src/schema/agent_task_sessions.ts` +- OpenClaw session routing: `packages/adapters/openclaw-gateway/README.md` +- assistant-ui docs: +- assistant-ui repo: +- AI SDK transport docs: diff --git a/doc/plans/2026-03-13-TOKEN-OPTIMIZATION-PLAN.md b/doc/plans/2026-03-13-TOKEN-OPTIMIZATION-PLAN.md new file mode 100644 index 00000000..678444ac --- /dev/null +++ b/doc/plans/2026-03-13-TOKEN-OPTIMIZATION-PLAN.md @@ -0,0 +1,383 @@ +# Token Optimization Plan + +Date: 2026-03-13 +Related discussion: https://github.com/paperclipai/paperclip/discussions/449 + +## Goal + +Reduce token consumption materially without reducing agent capability, control-plane visibility, or task completion quality. + +This plan is based on: + +- the current V1 control-plane design +- the current adapter and heartbeat implementation +- the linked user discussion +- local runtime data from the default Paperclip instance on 2026-03-13 + +## Executive Summary + +The discussion is directionally right about two things: + +1. We should preserve session and prompt-cache locality more aggressively. +2. We should separate stable startup instructions from per-heartbeat dynamic context. + +But that is not enough on its own. + +After reviewing the code and local run data, the token problem appears to have four distinct causes: + +1. **Measurement inflation on sessioned adapters.** Some token counters, especially for `codex_local`, appear to be recorded as cumulative session totals instead of per-heartbeat deltas. +2. **Avoidable session resets.** Task sessions are intentionally reset on timer wakes and manual wakes, which destroys cache locality for common heartbeat paths. +3. **Repeated context reacquisition.** The `paperclip` skill tells agents to re-fetch assignments, issue details, ancestors, and full comment threads on every heartbeat. The API does not currently offer efficient delta-oriented alternatives. +4. **Large static instruction surfaces.** Agent instruction files and globally injected skills are reintroduced at startup even when most of that content is unchanged and not needed for the current task. + +The correct approach is: + +1. fix telemetry so we can trust the numbers +2. preserve reuse where it is safe +3. make context retrieval incremental +4. add session compaction/rotation so long-lived sessions do not become progressively more expensive + +## Validated Findings + +### 1. Token telemetry is at least partly overstated today + +Observed from the local default instance: + +- `heartbeat_runs`: 11,360 runs between 2026-02-18 and 2026-03-13 +- summed `usage_json.inputTokens`: `2,272,142,368,952` +- summed `usage_json.cachedInputTokens`: `2,217,501,559,420` + +Those totals are not credible as true per-heartbeat usage for the observed prompt sizes. + +Supporting evidence: + +- `adapter.invoke.payload.prompt` averages were small: + - `codex_local`: ~193 chars average, 6,067 chars max + - `claude_local`: ~160 chars average, 1,160 chars max +- despite that, many `codex_local` runs report millions of input tokens +- one reused Codex session in local data spans 3,607 runs and recorded `inputTokens` growing up to `1,155,283,166` + +Interpretation: + +- for sessioned adapters, especially Codex, we are likely storing usage reported by the runtime as a **session total**, not a **per-run delta** +- this makes trend reporting, optimization work, and customer trust worse + +This does **not** mean there is no real token problem. It means we need a trustworthy baseline before we can judge optimization impact. + +### 2. Timer wakes currently throw away reusable task sessions + +In `server/src/services/heartbeat.ts`, `shouldResetTaskSessionForWake(...)` returns `true` for: + +- `wakeReason === "issue_assigned"` +- `wakeSource === "timer"` +- manual on-demand wakes + +That means many normal heartbeats skip saved task-session resume even when the workspace is stable. + +Local data supports the impact: + +- `timer/system` runs: 6,587 total +- only 976 had a previous session +- only 963 ended with the same session + +So timer wakes are the largest heartbeat path and are mostly not resuming prior task state. + +### 3. We repeatedly ask agents to reload the same task context + +The `paperclip` skill currently tells agents to do this on essentially every heartbeat: + +- fetch assignments +- fetch issue details +- fetch ancestor chain +- fetch full issue comments + +Current API shape reinforces that pattern: + +- `GET /api/issues/:id/comments` returns the full thread +- there is no `since`, cursor, digest, or summary endpoint for heartbeat consumption +- `GET /api/issues/:id` returns full enriched issue context, not a minimal delta payload + +This is safe but expensive. It forces the model to repeatedly consume unchanged information. + +### 4. Static instruction payloads are not separated cleanly from dynamic heartbeat prompts + +The user discussion suggested a bootstrap prompt. That is the right direction. + +Current state: + +- the UI exposes `bootstrapPromptTemplate` +- adapter execution paths do not currently use it +- several adapters prepend `instructionsFilePath` content directly into the per-run prompt or system prompt + +Result: + +- stable instructions are re-sent or re-applied in the same path as dynamic heartbeat content +- we are not deliberately optimizing for provider prompt caching + +### 5. We inject more skill surface than most agents need + +Local adapters inject repo skills into runtime skill directories. + +Current repo skill sizes: + +- `skills/paperclip/SKILL.md`: 17,441 bytes +- `skills/create-agent-adapter/SKILL.md`: 31,832 bytes +- `skills/paperclip-create-agent/SKILL.md`: 4,718 bytes +- `skills/para-memory-files/SKILL.md`: 3,978 bytes + +That is nearly 58 KB of skill markdown before any company-specific instructions. + +Not all of that is necessarily loaded into model context every run, but it increases startup surface area and should be treated as a token budget concern. + +## Principles + +We should optimize tokens under these rules: + +1. **Do not lose functionality.** Agents must still be able to resume work safely, understand why tasks exist, and act within governance rules. +2. **Prefer stable context over repeated context.** Unchanged instructions should not be resent through the most expensive path. +3. **Prefer deltas over full reloads.** Heartbeats should consume only what changed since the last useful run. +4. **Measure normalized deltas, not raw adapter claims.** Especially for sessioned CLIs. +5. **Keep escape hatches.** Board/manual runs may still want a forced fresh session. + +## Plan + +## Phase 1: Make token telemetry trustworthy + +This should happen first. + +### Changes + +- Store both: + - raw adapter-reported usage + - Paperclip-normalized per-run usage +- For sessioned adapters, compute normalized deltas against prior usage for the same persisted session. +- Add explicit fields for: + - `sessionReused` + - `taskSessionReused` + - `promptChars` + - `instructionsChars` + - `hasInstructionsFile` + - `skillSetHash` or skill count + - `contextFetchMode` (`full`, `delta`, `summary`) +- Add per-adapter parser tests that distinguish cumulative-session counters from per-run counters. + +### Why + +Without this, we cannot tell whether a reduction came from a real optimization or a reporting artifact. + +### Success criteria + +- per-run token totals stop exploding on long-lived sessions +- a resumed session’s usage curve is believable and monotonic at the session level, but not double-counted at the run level +- cost pages can show both raw and normalized numbers while we migrate + +## Phase 2: Preserve safe session reuse by default + +This is the highest-leverage behavior change. + +### Changes + +- Stop resetting task sessions on ordinary timer wakes. +- Keep resetting on: + - explicit manual “fresh run” invocations + - assignment changes + - workspace mismatch + - model mismatch / invalid resume errors +- Add an explicit wake flag like `forceFreshSession: true` when the board wants a reset. +- Record why a session was reused or reset in run metadata. + +### Why + +Timer wakes are the dominant heartbeat path. Resetting them destroys both session continuity and prompt cache reuse. + +### Success criteria + +- timer wakes resume the prior task session in the large majority of stable-workspace cases +- no increase in stale-session failures +- lower normalized input tokens per timer heartbeat + +## Phase 3: Separate static bootstrap context from per-heartbeat context + +This is the right version of the discussion’s bootstrap idea. + +### Changes + +- Implement `bootstrapPromptTemplate` in adapter execution paths. +- Use it only when starting a fresh session, not on resumed sessions. +- Keep `promptTemplate` intentionally small and stable: + - who I am + - what triggered this wake + - which task/comment/approval to prioritize +- Move long-lived setup text out of recurring per-run prompts where possible. +- Add UI guidance and warnings when `promptTemplate` contains high-churn or large inline content. + +### Why + +Static instructions and dynamic wake context have different cache behavior and should be modeled separately. + +### Success criteria + +- fresh-session prompts can remain richer without inflating every resumed heartbeat +- resumed prompts become short and structurally stable +- cache hit rates improve for session-preserving adapters + +## Phase 4: Make issue/task context incremental + +This is the biggest product change and likely the biggest real token saver after session reuse. + +### Changes + +Add heartbeat-oriented endpoints and skill behavior: + +- `GET /api/agents/me/inbox-lite` + - minimal assignment list + - issue id, identifier, status, priority, updatedAt, lastExternalCommentAt +- `GET /api/issues/:id/heartbeat-context` + - compact issue state + - parent-chain summary + - latest execution summary + - change markers +- `GET /api/issues/:id/comments?after=` or `?since=` + - return only new comments +- optional `GET /api/issues/:id/context-digest` + - server-generated compact summary for heartbeat use + +Update the `paperclip` skill so the default pattern becomes: + +1. fetch compact inbox +2. fetch compact task context +3. fetch only new comments unless this is the first read, a mention-triggered wake, or a cache miss +4. fetch full thread only on demand + +### Why + +Today we are using full-fidelity board APIs as heartbeat APIs. That is convenient but token-inefficient. + +### Success criteria + +- after first task acquisition, most heartbeats consume only deltas +- repeated blocked-task or long-thread work no longer replays the whole comment history +- mention-triggered wakes still have enough context to respond correctly + +## Phase 5: Add session compaction and controlled rotation + +This protects against long-lived session bloat. + +### Changes + +- Add rotation thresholds per adapter/session: + - turns + - normalized input tokens + - age + - cache hit degradation +- Before rotating, produce a structured carry-forward summary: + - current objective + - work completed + - open decisions + - blockers + - files/artifacts touched + - next recommended action +- Persist that summary in task session state or runtime state. +- Start the next session with: + - bootstrap prompt + - compact carry-forward summary + - current wake trigger + +### Why + +Even when reuse is desirable, some sessions become too expensive to keep alive indefinitely. + +### Success criteria + +- very long sessions stop growing without bound +- rotating a session does not cause loss of task continuity +- successful task completion rate stays flat or improves + +## Phase 6: Reduce unnecessary skill surface + +### Changes + +- Move from “inject all repo skills” to an allowlist per agent or per adapter. +- Default local runtime skill set should likely be: + - `paperclip` +- Add opt-in skills for specialized agents: + - `paperclip-create-agent` + - `para-memory-files` + - `create-agent-adapter` +- Expose active skill set in agent config and run metadata. + +### Why + +Most agents do not need adapter-authoring or memory-system skills on every run. + +### Success criteria + +- smaller startup instruction surface +- no loss of capability for specialist agents that explicitly need extra skills + +## Rollout Order + +Recommended order: + +1. telemetry normalization +2. timer-wake session reuse +3. bootstrap prompt implementation +4. heartbeat delta APIs + `paperclip` skill rewrite +5. session compaction/rotation +6. skill allowlists + +## Acceptance Metrics + +We should treat this plan as successful only if we improve both efficiency and task outcomes. + +Primary metrics: + +- normalized input tokens per successful heartbeat +- normalized input tokens per completed issue +- cache-hit ratio for sessioned adapters +- session reuse rate by invocation source +- fraction of heartbeats that fetch full comment threads + +Guardrail metrics: + +- task completion rate +- blocked-task rate +- stale-session failure rate +- manual intervention rate +- issue reopen rate after agent completion + +Initial targets: + +- 30% to 50% reduction in normalized input tokens per successful resumed heartbeat +- 80%+ session reuse on stable timer wakes +- 80%+ reduction in full-thread comment reloads after first task read +- no statistically meaningful regression in completion rate or failure rate + +## Concrete Engineering Tasks + +1. Add normalized usage fields and migration support for run analytics. +2. Patch sessioned adapter accounting to compute deltas from prior session totals. +3. Change `shouldResetTaskSessionForWake(...)` so timer wakes do not reset by default. +4. Implement `bootstrapPromptTemplate` end-to-end in adapter execution. +5. Add compact heartbeat context and incremental comment APIs. +6. Rewrite `skills/paperclip/SKILL.md` around delta-fetch behavior. +7. Add session rotation with carry-forward summaries. +8. Replace global skill injection with explicit allowlists. + +## Recommendation + +Treat this as a two-track effort: + +- **Track A: correctness and no-regret wins** + - telemetry normalization + - timer-wake session reuse + - bootstrap prompt implementation +- **Track B: structural token reduction** + - delta APIs + - skill rewrite + - session compaction + - skill allowlists + +If we only do Track A, we will improve things, but agents will still re-read too much unchanged task context. + +If we only do Track B without fixing telemetry first, we will not be able to prove the gains cleanly. diff --git a/doc/plans/2026-03-13-features.md b/doc/plans/2026-03-13-features.md new file mode 100644 index 00000000..80c60a87 --- /dev/null +++ b/doc/plans/2026-03-13-features.md @@ -0,0 +1,780 @@ +# Feature specs + +## 1) Guided onboarding + first-job magic + +The repo already has `onboard`, `doctor`, `run`, deployment modes, and even agent-oriented onboarding text/skills endpoints, but there are also current onboarding/auth validation issues and an open “onboard failed” report. That means this is not just polish; it is product-critical. ([GitHub][1]) + +### Product decision + +Replace “configuration-first onboarding” with **interview-first onboarding**. + +### What we want + +- Ask 3–4 questions up front, not 20 settings. +- Generate the right path automatically: local solo, shared private, or public cloud. +- Detect what agent/runtime environment already exists. +- Make it normal to have Claude/OpenClaw/Codex help complete setup. +- End onboarding with a **real first task**, not a blank dashboard. + +### What we do not want + +- Provider jargon before value. +- “Go find an API key” as the default first instruction. +- A successful install that still leaves users unsure what to do next. + +### Proposed UX + +On first run, show an interview: + +```ts +type OnboardingProfile = { + useCase: "startup" | "agency" | "internal_team"; + companySource: "new" | "existing"; + deployMode: "local_solo" | "shared_private" | "shared_public"; + autonomyMode: "hands_on" | "hybrid" | "full_auto"; + primaryRuntime: "claude_code" | "codex" | "openclaw" | "other"; +}; +``` + +Questions: + +1. What are you building? +2. Is this a new company, an existing company, or a service/agency team? +3. Are you working solo on one machine, sharing privately with a team, or deploying publicly? +4. Do you want full auto, hybrid, or tight manual control? + +Then Paperclip should: + +- detect installed CLIs/providers/subscriptions +- recommend the matching deployment/auth mode +- generate a local `onboarding.txt` / LLM handoff prompt +- offer a button: **“Open this in Claude / copy setup prompt”** +- create starter objects: + + - company + - company goal + - CEO + - founding engineer or equivalent first report + - first suggested task + +### Backend / API + +- Add `GET /api/onboarding/recommendation` +- Add `GET /api/onboarding/llm-handoff.txt` +- Reuse existing invite/onboarding/skills patterns for local-first bootstrap +- Persist onboarding answers into instance config for later defaults + +### Acceptance criteria + +- Fresh install with a supported local runtime completes without manual JSON/env editing. +- User sees first live agent action before leaving onboarding. +- A blank dashboard is no longer the default post-install state. +- If a required dependency is missing, the error is prescriptive and fixable from the UI/CLI. + +### Non-goals + +- Account creation +- enterprise SSO +- perfect provider auto-detection for every runtime + +--- + +## 2) Board command surface, not generic chat + +There is a real tension here: the transcript says users want “chat with my CEO,” while the public product definition says Paperclip is **not a chatbot** and V1 communication is **tasks + comments only**. At the same time, the repo is already exploring plugin infrastructure and even a chat plugin via plugin SSE streaming. The clean resolution is: **make the core surface conversational, but keep the data model task/thread-centric; reserve full chat as an optional plugin**. ([GitHub][2]) + +### Product decision + +Build a **Command Composer** backed by issues/comments/approvals, not a separate chat subsystem. + +### What we want + +- “Talk to the CEO” feeling for the user. +- Every conversation ends up attached to a real company object. +- Strategy discussion can produce issues, artifacts, and approvals. + +### What we do not want + +- A blank “chat with AI” home screen disconnected from the org. +- Yet another agent-chat product. + +### Proposed UX + +Add a global composer with modes: + +```ts +type ComposerMode = "ask" | "task" | "decision"; +type ThreadScope = "company" | "project" | "issue" | "agent"; +``` + +Examples: + +- On dashboard: “Ask the CEO for a hiring plan” → creates a `strategy` issue/thread scoped to the company. +- On agent page: “Tell the designer to make this cleaner” → appends an instruction comment to an issue or spawns a new delegated task. +- On approval page: “Why are you asking to hire?” → appends a board comment to the approval context. + +Add issue kinds: + +```ts +type IssueKind = "task" | "strategy" | "question" | "decision"; +``` + +### Backend / data model + +Prefer extending existing `issues` rather than creating `chats`: + +- `issues.kind` +- `issues.scope` +- optional `issues.target_agent_id` +- comment metadata: `comment.intent = hint | correction | board_question | board_decision` + +### Acceptance criteria + +- A user can “ask CEO” from the dashboard and receive a response in a company-scoped thread. +- From that thread, the user can create/approve tasks with one click. +- No separate chat database is required for v1 of this feature. + +### Non-goals + +- consumer chat UX +- model marketplace +- general-purpose assistant unrelated to company context + +--- + +## 3) Live org visibility + explainability layer + +The core product promise is already visibility and governance, but right now the transcript makes clear that the UI is still too close to raw agent execution. The repo already has org charts, activity, heartbeat runs, costs, and agent detail surfaces; the missing piece is the explanatory layer above them. ([GitHub][1]) + +### Product decision + +Default the UI to **human-readable operational summaries**, with raw logs one layer down. + +### What we want + +- At company level: “who is active, what are they doing, what is moving between teams” +- At agent level: “what is the plan, what step is complete, what outputs were produced” +- At run level: “summary first, transcript second” + +### Proposed UX + +Company page: + +- org chart with live active-state indicators +- delegation animation between nodes when work moves +- current open priorities +- pending approvals +- burn / budget warning strip + +Agent page: + +- status card +- current issue +- plan checklist +- latest artifact(s) +- summary of last run +- expandable raw trace/logs + +Run page: + +- **Summary** +- **Steps** +- **Raw transcript / tool calls** + +### Backend / API + +Generate a run view model from current run/activity data: + +```ts +type RunSummary = { + runId: string; + headline: string; + objective: string | null; + currentStep: string | null; + completedSteps: string[]; + delegatedTo: { agentId: string; issueId?: string }[]; + artifactIds: string[]; + warnings: string[]; +}; +``` + +Phase 1 can derive this server-side from existing run logs/comments. Persist only if needed later. + +### Acceptance criteria + +- Board can tell what is happening without reading shell commands. +- Raw logs are still accessible, but not the default surface. +- First task / first hire / first completion moments are visibly celebrated. + +### Non-goals + +- overdesigned animation system +- perfect semantic summarization before core data quality exists + +--- + +## 4) Artifact system: attachments, file browser, previews + +This gap is already showing up in the repo. Storage is present, attachment endpoints exist, but current issues show that attachments are still effectively image-centric and comment attachment rendering is incomplete. At the same time, your transcript wants plans, docs, files, and generated web pages surfaced cleanly. ([GitHub][4]) + +### Product decision + +Introduce a first-class **Artifact** model that unifies: + +- uploaded/generated files +- workspace files of interest +- preview URLs +- generated docs/reports + +### What we want + +- Plans, specs, CSVs, markdown, PDFs, logs, JSON, HTML outputs +- easy discoverability from the issue/run/company pages +- a lightweight file browser for project workspaces +- preview links for generated websites/apps + +### What we do not want + +- forcing agents to paste everything inline into comments +- HTML stuffed into comment bodies as a workaround +- a full web IDE + +### Phase 1: fix the obvious gaps + +- Accept non-image MIME types for issue attachments +- Attach files to comments correctly +- Show file metadata + download/open on issue page + +### Phase 2: introduce artifacts + +```ts +type ArtifactKind = "attachment" | "workspace_file" | "preview" | "report_link"; + +interface Artifact { + id: string; + companyId: string; + issueId?: string; + runId?: string; + agentId?: string; + kind: ArtifactKind; + title: string; + mimeType?: string; + filename?: string; + sizeBytes?: number; + storageKind: "local_disk" | "s3" | "external_url"; + contentPath?: string; + previewUrl?: string; + metadata: Record; +} +``` + +### UX + +Issue page gets a **Deliverables** section: + +- Files +- Reports +- Preview links +- Latest generated artifact highlighted at top + +Project page gets a **Files** tab: + +- folder tree +- recent changes +- “Open produced files” shortcut + +### Preview handling + +For HTML/static outputs: + +- local deploy → open local preview URL +- shared/public deploy → host via configured preview service or static storage +- preview URL is registered back onto the issue as an artifact + +### Acceptance criteria + +- Agents can attach `.md`, `.txt`, `.json`, `.csv`, `.pdf`, and `.html`. +- Users can open/download them from the issue page. +- A generated static site can be opened from an issue without hunting through the filesystem. + +### Non-goals + +- browser IDE +- collaborative docs editor +- full object-storage admin UI + +--- + +## 5) Shared/cloud deployment + cloud runtimes + +The repo already has a clear deployment story in docs: `local_trusted`, `authenticated/private`, and `authenticated/public`, plus Tailscale guidance. The roadmap explicitly calls out cloud agents like Cursor / e2b. That means the next step is not inventing a deployment model; it is making the shared/cloud path canonical and production-usable. ([GitHub][5]) + +### Product decision + +Make **shared/private deploy** and **public/cloud deploy** first-class supported modes, and add **remote runtime drivers** for cloud-executed agents. + +### What we want + +- one instance a team can actually share +- local-first path that upgrades to private/public without a mental model change +- remote agent execution for non-local runtimes + +### Proposed architecture + +Separate **control plane** from **execution runtime** more explicitly: + +```ts +type RuntimeDriver = "local_process" | "remote_sandbox" | "webhook"; + +interface ExecutionHandle { + externalRunId: string; + status: "queued" | "running" | "completed" | "failed" | "cancelled"; + previewUrl?: string; + logsUrl?: string; +} +``` + +First remote driver: `remote_sandbox` for e2b-style execution. + +### Deliverables + +- canonical deploy recipes: + + - local solo + - shared private (Tailscale/private auth) + - public cloud (managed Postgres + object storage + public URL) + +- runtime health page +- adapter/runtime capability matrix +- one official reference deployment path + +### UX + +New “Deployment” settings page: + +- instance mode +- auth/exposure +- storage/database status +- runtime drivers configured +- health and reachability checks + +### Acceptance criteria + +- Two humans can log into one authenticated/private instance and use it concurrently. +- A public deployment can run agents via at least one remote runtime. +- `doctor` catches missing public/private config and gives concrete fixes. + +### Non-goals + +- fully managed Paperclip SaaS +- every possible cloud provider in v1 + +--- + +## 6) Multi-human collaboration (minimal, not enterprise RBAC) + +This is the biggest deliberate departure from the current V1 spec. Publicly, V1 still says “single human board operator” and puts role-based human granularity out of scope. But the transcript is right that shared use is necessary if Paperclip is going to be real for teams. The key is to do a **minimal collaboration model**, not a giant permission system. ([GitHub][2]) + +### Product decision + +Ship **coarse multi-user company memberships**, not fine-grained enterprise RBAC. + +### Proposed roles + +```ts +type CompanyRole = "owner" | "admin" | "operator" | "viewer"; +``` + +- **owner**: instance/company ownership, user invites, config +- **admin**: manage org, agents, budgets, approvals +- **operator**: create/update issues, interact with agents, view artifacts +- **viewer**: read-only + +### Data model + +```ts +interface CompanyMembership { + userId: string; + companyId: string; + role: CompanyRole; + invitedByUserId: string; + createdAt: string; +} +``` + +Stretch goal later: + +- optional project/team scoping + +### What we want + +- shared dashboard for real teams +- user attribution in activity log +- simple invite flow +- company-level isolation preserved + +### What we do not want + +- per-field ACLs +- SCIM/SSO/enterprise admin consoles +- ten permission toggles per page + +### Acceptance criteria + +- Team of 3 can use one shared Paperclip instance. +- Every user action is attributed correctly in activity. +- Company membership boundaries are enforced. +- Viewer cannot mutate; operator/admin can. + +### Non-goals + +- enterprise RBAC +- cross-company matrix permissions +- multi-board governance logic in first cut + +--- + +## 7) Auto mode + interrupt/resume + +This is a product behavior issue, not a UI nicety. If agents cannot keep working or accept course correction without restarting, the autonomy model feels fake. + +### Product decision + +Make auto mode and mid-run interruption first-class runtime semantics. + +### What we want + +- Auto mode that continues until blocked by approvals, budgets, or explicit pause. +- Mid-run “you missed this” correction without losing session continuity. +- Clear state when an agent is waiting, blocked, or paused. + +### Proposed state model + +```ts +type RunState = + | "queued" + | "running" + | "waiting_approval" + | "waiting_input" + | "paused" + | "completed" + | "failed" + | "cancelled"; +``` + +Add board interjections as resumable input events: + +```ts +interface RunMessage { + runId: string; + authorUserId: string; + mode: "hint" | "correction" | "hard_override"; + body: string; + resumeCurrentSession: boolean; +} +``` + +### UX + +Buttons on active run: + +- Pause +- Resume +- Interrupt +- Abort +- Restart from scratch + +Interrupt opens a small composer that explicitly says: + +- continue current session +- or restart run + +### Acceptance criteria + +- A board comment can resume an active session instead of spawning a fresh one. +- Session ID remains stable for “continue” path. +- UI clearly distinguishes blocked vs. waiting vs. paused. + +### Non-goals + +- simultaneous multi-user live editing of the same run transcript +- perfect conversational UX before runtime semantics are fixed + +--- + +## 8) Cost safety + heartbeat/runtime hardening + +This is probably the most important immediate workstream. The transcript says token burn is the highest pain, and the repo currently has active issues around budget enforcement evidence, onboarding/auth validation, and circuit-breaker style waste prevention. Public docs already promise hard budgets, and the issue tracker is pointing at the missing operational protections. ([GitHub][6]) + +### Product decision + +Treat this as a **P0 runtime contract**, not a nice-to-have. + +### Part A: deterministic wake gating + +Do cheap, explicit work detection before invoking an LLM. + +```ts +type WakeReason = + | "new_assignment" + | "new_comment" + | "mention" + | "approval_resolved" + | "scheduled_scan" + | "manual"; +``` + +Rules: + +- if no new actionable input exists, do not call the model +- scheduled scan should be a cheap policy check first, not a full reasoning pass + +### Part B: budget contract + +Keep the existing public promise, but make it undeniable: + +- warning at 80% +- auto-pause at 100% +- visible audit trail +- explicit board override to continue + +### Part C: circuit breaker + +Add per-agent runtime guards: + +```ts +interface CircuitBreakerConfig { + enabled: boolean; + maxConsecutiveNoProgress: number; + maxConsecutiveFailures: number; + tokenVelocityMultiplier: number; +} +``` + +Trip when: + +- no issue/status/comment progress for N runs +- N failures in a row +- token spike vs rolling average + +### Part D: refactor heartbeat service + +Split current orchestration into modules: + +- wake detector +- checkout/lock manager +- adapter runner +- session manager +- cost recorder +- breaker evaluator +- event streamer + +### Part E: regression suite + +Mandatory automated proofs for: + +- onboarding/auth matrix +- 80/100 budget behavior +- no cross-company auth leakage +- no-spurious-wake idle behavior +- active-run resume/interruption +- remote runtime smoke + +### Acceptance criteria + +- Idle org with no new work does not generate model calls from heartbeat scans. +- 80% shows warning only. +- 100% pauses the agent and blocks continued execution until override. +- Circuit breaker pause is visible in audit/activity. +- Runtime modules have explicit contracts and are testable independently. + +### Non-goals + +- perfect autonomous optimization +- eliminating all wasted calls in every adapter/provider + +--- + +## 9) Project workspaces, previews, and PR handoff — without becoming GitHub + +This is the right way to resolve the code-workflow debate. The repo already has worktree-local instances, project `workspaceStrategy.provisionCommand`, and an RFC for adapter-level git worktree isolation. That is the correct architectural direction: **project execution policies and workspace isolation**, not built-in PR review. ([GitHub][7]) + +### Product decision + +Paperclip should manage the **issue → workspace → preview/PR → review handoff** lifecycle, but leave diffs/review/merge to external tools. + +### Proposed config + +Prefer repo-local project config: + +```yaml +# .paperclip/project.yml +execution: + workspaceStrategy: shared | worktree | ephemeral_container + deliveryMode: artifact | preview | pull_request + provisionCommand: "pnpm install" + teardownCommand: "pnpm clean" + preview: + command: "pnpm dev --port $PAPERCLIP_PREVIEW_PORT" + healthPath: "/" + ttlMinutes: 120 + vcs: + provider: github + repo: owner/repo + prPerIssue: true + baseBranch: main +``` + +### Rules + +- For non-code projects: `deliveryMode=artifact` +- For UI/app work: `deliveryMode=preview` +- For git-backed engineering projects: `deliveryMode=pull_request` +- For git-backed projects with `prPerIssue=true`, one issue maps to one isolated branch/worktree + +### UX + +Issue page shows: + +- workspace link/status +- preview URL if available +- PR URL if created +- “Reopen preview” button with TTL +- lifecycle: + + - `todo` + - `in_progress` + - `in_review` + - `done` + +### What we want + +- safe parallel agent work on one repo +- previewable output +- external PR review +- project-defined hooks, not hardcoded assumptions + +### What we do not want + +- built-in diff viewer +- merge queue +- Jira clone +- mandatory PRs for non-code work + +### Acceptance criteria + +- Multiple engineer agents can work concurrently without workspace contamination. +- When a project is in PR mode, the issue contains branch/worktree/preview/PR metadata. +- Preview can be reopened on demand until TTL expires. + +### Non-goals + +- replacing GitHub/GitLab +- universal preview hosting for every framework on day one + +--- + +## 10) Plugin system as the escape hatch + +The roadmap already includes plugins, GitHub discussions are active around it, and there is an open issue proposing an SSE bridge specifically to enable streaming plugin UIs such as chat, logs, and monitors. This is exactly the right place for optional surfaces. ([GitHub][1]) + +### Product decision + +Keep the control-plane core thin; put optional high-variance experiences into plugins. + +### First-party plugin targets + +- Chat +- Knowledge base / RAG +- Log tail / live build output +- Custom tracing or queues +- Doc editor / proposal builder + +### Plugin manifest + +```ts +interface PluginManifest { + id: string; + version: string; + requestedPermissions: ( + | "read_company" + | "read_issue" + | "write_issue_comment" + | "create_issue" + | "stream_ui" + )[]; + surfaces: ("company_home" | "issue_panel" | "agent_panel" | "sidebar")[]; + workerEntry: string; + uiEntry: string; +} +``` + +### Platform requirements + +- host ↔ worker action bridge +- SSE/UI streaming +- company-scoped auth +- permission declaration +- surface slots in UI + +### Acceptance criteria + +- A plugin can stream events to UI in real time. +- A chat plugin can converse without requiring chat to become the core Paperclip product. +- Plugin permissions are company-scoped and auditable. + +### Non-goals + +- plugins mutating core schema directly +- arbitrary privileged code execution without explicit permissions + +--- + +## Priority order I would use + +Given the repo state and the transcript, I would sequence it like this: + +**P0** + +1. Cost safety + heartbeat hardening +2. Guided onboarding + first-job magic +3. Shared/cloud deployment foundation +4. Artifact phase 1: non-image attachments + deliverables surfacing + +**P1** 5. Board command surface 6. Visibility/explainability layer 7. Auto mode + interrupt/resume 8. Minimal multi-user collaboration + +**P2** 9. Project workspace / preview / PR lifecycle 10. Plugin system + optional chat plugin 11. Template/preset expansion for startup vs agency vs internal-team onboarding + +Why this order: the current repo is already getting pressure on onboarding failures, auth/onboarding validation, budget enforcement, and wasted token burn. If those are shaky, everything else feels impressive but unsafe. ([GitHub][3]) + +## Bottom line + +The best synthesis is: + +- **Keep** Paperclip as the board-level control plane. +- **Do not** make chat, code review, or workflow-building the core identity. +- **Do** make the product feel conversational, visible, output-oriented, and shared. +- **Do** make coding workflows an integration surface via workspaces/previews/PR links. +- **Use plugins** for richer edges like chat and knowledge. + +That keeps the repo’s current product direction intact while solving almost every pain surfaced in the transcript. + +### Key references + +- README / positioning / roadmap / product boundaries. ([GitHub][1]) +- Product definition. ([GitHub][8]) +- V1 implementation spec and explicit non-goals. ([GitHub][2]) +- Core concepts and architecture. ([GitHub][9]) +- Deployment modes / Tailscale / local-to-cloud path. ([GitHub][5]) +- Developing guide: worktree-local instances, provision hooks, onboarding endpoints. ([GitHub][7]) +- Current issue pressure: onboarding failure, auth/onboarding validation, budget enforcement, circuit breaker, attachment gaps, plugin chat. ([GitHub][3]) + +[1]: https://github.com/paperclipai/paperclip "https://github.com/paperclipai/paperclip" +[2]: https://github.com/paperclipai/paperclip/blob/master/doc/SPEC-implementation.md "https://github.com/paperclipai/paperclip/blob/master/doc/SPEC-implementation.md" +[3]: https://github.com/paperclipai/paperclip/issues/704 "https://github.com/paperclipai/paperclip/issues/704" +[4]: https://github.com/paperclipai/paperclip/blob/master/docs/deploy/tailscale-private-access.md "https://github.com/paperclipai/paperclip/blob/master/docs/deploy/tailscale-private-access.md" +[5]: https://github.com/paperclipai/paperclip/blob/master/docs/deploy/deployment-modes.md "https://github.com/paperclipai/paperclip/blob/master/docs/deploy/deployment-modes.md" +[6]: https://github.com/paperclipai/paperclip/issues/692 "https://github.com/paperclipai/paperclip/issues/692" +[7]: https://github.com/paperclipai/paperclip/blob/master/doc/DEVELOPING.md "https://github.com/paperclipai/paperclip/blob/master/doc/DEVELOPING.md" +[8]: https://github.com/paperclipai/paperclip/blob/master/doc/PRODUCT.md "https://github.com/paperclipai/paperclip/blob/master/doc/PRODUCT.md" +[9]: https://github.com/paperclipai/paperclip/blob/master/docs/start/core-concepts.md "https://github.com/paperclipai/paperclip/blob/master/docs/start/core-concepts.md" diff --git a/doc/plans/2026-03-13-workspace-product-model-and-work-product.md b/doc/plans/2026-03-13-workspace-product-model-and-work-product.md new file mode 100644 index 00000000..ae5b8e79 --- /dev/null +++ b/doc/plans/2026-03-13-workspace-product-model-and-work-product.md @@ -0,0 +1,1126 @@ +# Workspace Product Model, Work Product, and PR Flow + +## Context + +Paperclip needs to support two very different but equally valid ways of working: + +- a solo developer working directly on `master`, or in a folder that is not even a git repo +- a larger engineering workflow with isolated branches, previews, pull requests, and cleanup automation + +Today, Paperclip already has the beginnings of this model: + +- `projects` can carry execution workspace policy +- `project_workspaces` already exist as a durable project-scoped object +- issues can carry execution workspace settings +- runtime services can be attached to a workspace or issue + +What is missing is a clear product model and UI that make these capabilities understandable and operable. + +The main product risk is overloading one concept to do too much: + +- making subissues do the job of branches or PRs +- making projects too infrastructure-heavy +- making workspaces so hidden that users cannot form a mental model +- making Paperclip feel like a code review tool instead of a control plane + +## Goals + +1. Keep `project` lightweight enough to remain a planning container. +2. Make workspace behavior understandable for both git and non-git projects. +3. Support three real workflows without forcing one: + - shared workspace / direct-edit workflows + - isolated issue workspace workflows + - long-lived branch or operator integration workflows +4. Provide a first-class place to see the outputs of work: + - previews + - PRs + - branches + - commits + - documents and artifacts +5. Keep the main navigation and task board simple. +6. Seamlessly upgrade existing Paperclip users to the new model without forcing disruptive reconfiguration. +7. Support cloud-hosted Paperclip deployments where execution happens in remote or adapter-managed environments rather than local workers. + +## Non-Goals + +- Turning Paperclip into a full code review product +- Requiring every issue to have its own branch or PR +- Requiring every project to configure code/workspace automation +- Making workspaces a top-level global navigation primitive in V1 +- Requiring a local filesystem path or local git checkout to use workspace-aware execution + +## Core Product Decisions + +### 1. Project stays the planning object + +A `project` remains the thing that groups work around a deliverable or initiative. + +It may have: + +- no code at all +- one default codebase/workspace +- several codebases/workspaces + +Projects are not required to become heavyweight. + +### 2. Project workspace is a first-class object, but scoped under project + +A `project workspace` is the durable codebase or root environment for a project. + +Examples: + +- a local folder on disk +- a git repo checkout +- a monorepo package root +- a non-git design/doc folder +- a remote adapter-managed codebase reference + +This is the stable anchor that operators configure once. + +It should not be a top-level sidebar item in the main app. It should live under the project experience. + +### 3. Execution workspace is a first-class runtime object + +An `execution workspace` is where a specific run or issue actually executes. + +Examples: + +- the shared project workspace itself +- an isolated git worktree +- a long-lived operator branch checkout +- an adapter-managed remote sandbox +- a cloud agent provider's isolated branch/session environment + +This object must be recorded explicitly so that Paperclip can: + +- show where work happened +- attach previews and runtime services +- link PRs and branches +- decide cleanup behavior +- support reuse across multiple related issues + +### 4. PRs are work product, not the core issue model + +A PR is an output of work, not the planning unit. + +Paperclip should treat PRs as a type of work product linked back to: + +- the issue +- the execution workspace +- optionally the project workspace + +Git-specific automation should live under workspace policy, not under the core issue abstraction. + +### 5. Existing users must upgrade automatically + +Paperclip already has users and existing project/task data. Any new model must preserve continuity. + +The product should default existing installs into a sensible compatibility mode: + +- existing projects without workspace configuration continue to work unchanged +- existing `project_workspaces` become the durable `project workspace` objects +- existing project execution workspace policy is mapped forward rather than discarded +- issues without explicit workspace fields continue to inherit current behavior + +This migration should feel additive, not like a mandatory re-onboarding flow. + +### 6. Cloud-hosted Paperclip must be a first-class deployment mode + +Paperclip cannot assume that it is running on the same machine as the code. + +In cloud deployments, Paperclip may: + +- run on Vercel or another serverless host +- have no long-lived local worker process +- delegate execution to a remote coding agent or provider-managed sandbox +- receive back a branch, PR, preview URL, or artifact from that remote environment + +The model therefore must be portable: + +- `project workspace` may be remote-managed, not local +- `execution workspace` may have no local `cwd` +- `runtime services` may be tracked by provider reference and URL rather than a host process +- work product harvesting must handle externally owned previews and PRs + +### 7. Subissues remain planning and ownership structure + +Subissues are for decomposition and parallel ownership. + +They are not the same thing as: + +- a branch +- a worktree +- a PR +- a preview + +They may correlate with those things, but they should not be overloaded to mean them. + +## Terminology + +Use these terms consistently in product copy: + +- `Project`: planning container +- `Project workspace`: durable configured codebase/root +- `Execution workspace`: actual runtime workspace used for issue execution +- `Isolated issue workspace`: user-facing term for an issue-specific derived workspace +- `Work product`: previews, PRs, branches, commits, artifacts, docs +- `Runtime service`: a process or service Paperclip owns or tracks for a workspace + +Use these terms consistently in migration and deployment messaging: + +- `Compatible mode`: existing behavior preserved without new workspace automation +- `Adapter-managed workspace`: workspace realized by a remote or cloud execution provider + +Avoid teaching users that "workspace" always means "git worktree on my machine". + +## Product Object Model + +## 1. Project + +Existing object. No fundamental change in role. + +### Required behavior + +- can exist without code/workspace configuration +- can have zero or more project workspaces +- can define execution defaults that new issues inherit + +### Proposed fields + +- `id` +- `companyId` +- `name` +- `description` +- `status` +- `goalIds` +- `leadAgentId` +- `targetDate` +- `executionWorkspacePolicy` +- `workspaces[]` +- `primaryWorkspace` + +## 2. Project Workspace + +Durable, configured, project-scoped codebase/root object. + +This should evolve from the current `project_workspaces` table into a more explicit product object. + +### Motivation + +This separates: + +- "what codebase/root does this project use?" + +from: + +- "what temporary execution environment did this issue run in?" + +That keeps the model simple for solo users while still supporting advanced automation. +It also lets cloud-hosted Paperclip deployments point at codebases and remotes without pretending the Paperclip host has direct filesystem access. + +### Proposed fields + +- `id` +- `companyId` +- `projectId` +- `name` +- `sourceType` + - `local_path` + - `git_repo` + - `remote_managed` + - `non_git_path` +- `cwd` +- `repoUrl` +- `defaultRef` +- `isPrimary` +- `visibility` + - `default` + - `advanced` +- `setupCommand` +- `cleanupCommand` +- `metadata` +- `createdAt` +- `updatedAt` + +### Notes + +- `sourceType=non_git_path` is important so non-git projects are first-class. +- `setupCommand` and `cleanupCommand` should be allowed here for workspace-root bootstrap, even when isolated execution is not used. +- For a monorepo, multiple project workspaces may point at different roots or packages under one repo. +- `sourceType=remote_managed` is important for cloud deployments where the durable codebase is defined by provider/repo metadata rather than a local checkout path. + +## 3. Project Execution Workspace Policy + +Project-level defaults for how issues execute. + +This is the main operator-facing configuration surface. + +### Motivation + +This lets Paperclip support: + +- direct editing in a shared workspace +- isolated workspaces for issue parallelism +- long-lived integration branch workflows +- remote cloud-agent execution that returns a branch or PR + +without forcing every issue or agent to expose low-level runtime configuration. + +### Proposed fields + +- `enabled: boolean` +- `defaultMode` + - `shared_workspace` + - `isolated_workspace` + - `operator_branch` + - `adapter_default` +- `allowIssueOverride: boolean` +- `defaultProjectWorkspaceId: uuid | null` +- `workspaceStrategy` + - `type` + - `project_primary` + - `git_worktree` + - `adapter_managed` + - `baseRef` + - `branchTemplate` + - `worktreeParentDir` + - `provisionCommand` + - `teardownCommand` +- `branchPolicy` + - `namingTemplate` + - `allowReuseExisting` + - `preferredOperatorBranch` +- `pullRequestPolicy` + - `mode` + - `disabled` + - `manual` + - `agent_may_open_draft` + - `approval_required_to_open` + - `approval_required_to_mark_ready` + - `baseBranch` + - `titleTemplate` + - `bodyTemplate` +- `runtimePolicy` + - `allowWorkspaceServices` + - `defaultServicesProfile` + - `autoHarvestOwnedUrls` +- `cleanupPolicy` + - `mode` + - `manual` + - `when_issue_terminal` + - `when_pr_closed` + - `retention_window` + - `retentionHours` + - `keepWhilePreviewHealthy` + - `keepWhileOpenPrExists` + +## 4. Issue Workspace Binding + +Issue-level selection of execution behavior. + +This should remain lightweight in the normal case and only surface richer controls when relevant. + +### Motivation + +Not every issue in a code project should create a new derived workspace. + +Examples: + +- a tiny fix can run in the shared workspace +- three related issues may intentionally share one integration branch +- a solo operator may be working directly on `master` + +### Proposed fields on `issues` + +- `projectWorkspaceId: uuid | null` +- `executionWorkspacePreference` + - `inherit` + - `shared_workspace` + - `isolated_workspace` + - `operator_branch` + - `reuse_existing` +- `preferredExecutionWorkspaceId: uuid | null` +- `executionWorkspaceSettings` + - keep advanced per-issue override fields here + +### Rules + +- if the project has no workspace automation, these fields may all be null +- if the project has one primary workspace, issue creation should default to it silently +- `reuse_existing` is advanced-only and should target active execution workspaces, not the whole workspace universe +- existing issues without these fields should behave as `inherit` during migration + +## 5. Execution Workspace + +A durable record for a shared or derived runtime workspace. + +This is the missing object that makes cleanup, previews, PRs, and branch reuse tractable. + +### Motivation + +Without an explicit `execution workspace` record, Paperclip has nowhere stable to attach: + +- derived branch/worktree identity +- active preview ownership +- PR linkage +- cleanup state +- "reuse this existing integration branch" behavior +- remote provider session identity + +### Proposed new object + +`execution_workspaces` + +### Proposed fields + +- `id` +- `companyId` +- `projectId` +- `projectWorkspaceId` +- `sourceIssueId` +- `mode` + - `shared_workspace` + - `isolated_workspace` + - `operator_branch` + - `adapter_managed` +- `strategyType` + - `project_primary` + - `git_worktree` + - `adapter_managed` +- `name` +- `status` + - `active` + - `idle` + - `in_review` + - `archived` + - `cleanup_failed` +- `cwd` +- `repoUrl` +- `baseRef` +- `branchName` +- `providerRef` +- `providerType` + - `local_fs` + - `git_worktree` + - `adapter_managed` + - `cloud_sandbox` +- `derivedFromExecutionWorkspaceId` +- `lastUsedAt` +- `openedAt` +- `closedAt` +- `cleanupEligibleAt` +- `cleanupReason` +- `metadata` +- `createdAt` +- `updatedAt` + +### Notes + +- `sourceIssueId` is the issue that originally caused the workspace to be created, not necessarily the only issue linked to it later. +- multiple issues may link to the same execution workspace in a long-lived branch workflow. +- `cwd` may be null for remote execution workspaces; provider identity and work product links still make the object useful. + +## 6. Issue-to-Execution Workspace Link + +An issue may need to link to one or more execution workspaces over time. + +Examples: + +- an issue begins in a shared workspace and later moves to an isolated one +- a failed attempt is archived and a new workspace is created +- several issues intentionally share one operator branch workspace + +### Proposed object + +`issue_execution_workspaces` + +### Proposed fields + +- `issueId` +- `executionWorkspaceId` +- `relationType` + - `current` + - `historical` + - `preferred` +- `createdAt` +- `updatedAt` + +### UI simplification + +Most issues should only show one current workspace in the main UI. Historical links belong in advanced/history views. + +## 7. Work Product + +User-facing umbrella concept for outputs of work. + +### Motivation + +Paperclip needs a single place to show: + +- "here is the preview" +- "here is the PR" +- "here is the branch" +- "here is the commit" +- "here is the artifact/report/doc" + +without turning issues into a raw dump of adapter details. + +### Proposed new object + +`issue_work_products` + +### Proposed fields + +- `id` +- `companyId` +- `projectId` +- `issueId` +- `executionWorkspaceId` +- `runtimeServiceId` +- `type` + - `preview_url` + - `runtime_service` + - `pull_request` + - `branch` + - `commit` + - `artifact` + - `document` +- `provider` + - `paperclip` + - `github` + - `gitlab` + - `vercel` + - `netlify` + - `custom` +- `externalId` +- `title` +- `url` +- `status` + - `active` + - `ready_for_review` + - `merged` + - `closed` + - `failed` + - `archived` +- `reviewState` + - `none` + - `needs_board_review` + - `approved` + - `changes_requested` +- `isPrimary` +- `healthStatus` + - `unknown` + - `healthy` + - `unhealthy` +- `summary` +- `metadata` +- `createdByRunId` +- `createdAt` +- `updatedAt` + +### Behavior + +- PRs are stored here as `type=pull_request` +- previews are stored here as `type=preview_url` or `runtime_service` +- Paperclip-owned processes should update health/status automatically +- external providers should at least store link, provider, external id, and latest known state +- cloud agents should be able to create work product records without Paperclip owning the execution host + +## Page and UI Model + +## 1. Global Navigation + +Do not add `Workspaces` as a top-level sidebar item in V1. + +### Motivation + +That would make the whole product feel infra-heavy, even for companies that do not use code automation. + +### Global nav remains + +- Dashboard +- Inbox +- Companies +- Agents +- Goals +- Projects +- Issues +- Approvals + +Workspaces and work product should be surfaced through project and issue detail views. + +## 2. Project Detail + +Add a project sub-navigation that keeps planning first and code second. + +### Tabs + +- `Overview` +- `Issues` +- `Code` +- `Activity` + +Optional future: + +- `Outputs` + +### `Overview` tab + +Planning-first summary: + +- project status +- goals +- lead +- issue counts +- top-level progress +- latest major work product summaries + +### `Issues` tab + +- default to top-level issues only +- show parent issue rollups: + - child count + - `x/y` done + - active preview/PR badges +- optional toggle: `Show subissues` + +### `Code` tab + +This is the main workspace configuration and visibility surface. + +#### Section: `Project Workspaces` + +List durable project workspaces for the project. + +Card/list columns: + +- workspace name +- source type +- path or repo +- default ref +- primary/default badge +- active execution workspaces count +- active issue count +- active preview count +- hosting type / provider when remote-managed + +Actions: + +- `Add workspace` +- `Edit` +- `Set default` +- `Archive` + +#### Section: `Execution Defaults` + +Fields: + +- `Enable workspace automation` +- `Default issue execution mode` + - `Shared workspace` + - `Isolated workspace` + - `Operator branch` + - `Adapter default` +- `Default codebase` +- `Allow issue override` + +#### Section: `Provisioning` + +Fields: + +- `Setup command` +- `Cleanup command` +- `Implementation` + - `Shared workspace` + - `Git worktree` + - `Adapter-managed` +- `Base ref` +- `Branch naming template` +- `Derived workspace parent directory` + +Hide git-specific fields when the selected workspace is not git-backed. +Hide local-path-specific fields when the selected workspace is remote-managed. + +#### Section: `Pull Requests` + +Fields: + +- `PR workflow` + - `Disabled` + - `Manual` + - `Agent may open draft PR` + - `Approval required to open PR` + - `Approval required to mark ready` +- `Default base branch` +- `PR title template` +- `PR body template` + +#### Section: `Previews and Runtime` + +Fields: + +- `Allow workspace runtime services` +- `Default services profile` +- `Harvest owned preview URLs` +- `Track external preview URLs` + +#### Section: `Cleanup` + +Fields: + +- `Cleanup mode` + - `Manual` + - `When issue is terminal` + - `When PR closes` + - `After retention window` +- `Retention window` +- `Keep while preview is active` +- `Keep while PR is open` + +## 3. Add Project Workspace Flow + +Entry point: `Project > Code > Add workspace` + +### Form fields + +- `Name` +- `Source type` + - `Local folder` + - `Git repo` + - `Non-git folder` + - `Remote managed` +- `Local path` +- `Repository URL` +- `Remote provider` +- `Remote workspace reference` +- `Default ref` +- `Set as default workspace` +- `Setup command` +- `Cleanup command` + +### Behavior + +- if source type is non-git, hide branch/PR-specific setup +- if source type is git, show ref and optional advanced branch fields +- if source type is remote-managed, show provider/reference fields and hide local-path-only configuration +- for simple solo users, this can be one path field and one save button + +## 4. Issue Create Flow + +Issue creation should stay simple by default. + +### Default behavior + +If the selected project: + +- has no workspace automation: show no workspace UI +- has one default project workspace and default execution mode: inherit silently + +### Show a `Workspace` section only when relevant + +#### Basic fields + +- `Codebase` + - default selected project workspace +- `Execution mode` + - `Project default` + - `Shared workspace` + - `Isolated workspace` + - `Operator branch` + +#### Advanced-only field + +- `Reuse existing execution workspace` + +This dropdown should show only active execution workspaces for the selected project workspace, with labels like: + +- `dotta/integration-branch` +- `PAP-447-add-worktree-support` +- `shared primary workspace` + +### Important rule + +Do not show a picker containing every possible workspace object by default. + +The normal flow should feel like: + +- choose project +- optionally choose codebase +- optionally choose execution mode + +not: + +- choose from a long mixed list of roots, derived worktrees, previews, and branch names + +### Migration rule + +For existing users, issue creation should continue to look the same until a project explicitly enables richer workspace behavior. + +## 5. Issue Detail + +Issue detail should expose workspace and work product clearly, but without becoming a code host UI. + +### Header chips + +Show compact summary chips near the title/status area: + +- `Codebase: Web App` +- `Workspace: Shared` +- `Workspace: PAP-447-add-worktree-support` +- `PR: Open` +- `Preview: Healthy` + +### Tabs + +- `Comments` +- `Subissues` +- `Work Product` +- `Activity` + +### `Work Product` tab + +Sections: + +- `Current workspace` +- `Previews` +- `Pull requests` +- `Branches and commits` +- `Artifacts and documents` + +#### Current workspace panel + +Fields: + +- workspace name +- mode +- branch +- base ref +- last used +- linked issues count +- cleanup status + +Actions: + +- `Open workspace details` +- `Mark in review` +- `Request cleanup` + +#### Pull request cards + +Fields: + +- title +- provider +- status +- review state +- linked branch +- open/ready/merged timestamps + +Actions: + +- `Open PR` +- `Refresh status` +- `Request board review` + +#### Preview cards + +Fields: + +- title +- URL +- provider +- health +- ownership +- updated at + +Actions: + +- `Open preview` +- `Refresh` +- `Archive` + +## 6. Execution Workspace Detail + +This can be reached from a project code tab or an issue work product tab. + +It does not need to be in the main sidebar. + +### Sections + +- identity +- source issue +- linked issues +- branch/ref +- provider/session identity +- active runtime services +- previews +- PRs +- cleanup state +- event/activity history + +### Motivation + +This is where advanced users go when they need to inspect the mechanics. Most users should not need it in normal flow. + +## 7. Inbox Behavior + +Inbox should surface actionable work product events, not every implementation detail. + +### Show inbox items for + +- issue assigned or updated +- PR needs board review +- PR opened or marked ready +- preview unhealthy +- workspace cleanup failed +- runtime service failed +- remote cloud-agent run returned PR or preview that needs review + +### Do not show by default + +- every workspace heartbeat +- every branch update +- every derived workspace creation + +### Display style + +If the inbox item is about a preview or PR, show issue context with it: + +- issue identifier and title +- parent issue if this is a subissue +- workspace name if relevant + +## 8. Issues List and Kanban + +Keep list and board planning-first. + +### Default behavior + +- show top-level issues by default +- show parent rollups for subissues +- do not flatten every child execution detail into the main board + +### Row/card adornments + +For issues with linked work product, show compact badges: + +- `1 PR` +- `2 previews` +- `shared workspace` +- `isolated workspace` + +### Optional advanced filters + +- `Has PR` +- `Has preview` +- `Workspace mode` +- `Codebase` + +## Upgrade and Migration Plan + +## 1. Product-level migration stance + +Migration must be silent-by-default and compatibility-preserving. + +Existing users should not be forced to: + +- create new workspace objects by hand before they can keep working +- re-tag old issues +- learn new workspace concepts before basic issue flows continue to function + +## 2. Existing project migration + +On upgrade: + +- existing `project_workspaces` records are retained and shown as `Project Workspaces` +- the current primary workspace remains the default codebase +- existing project execution workspace policy is mapped into the new `Project Execution Workspace Policy` surface +- projects with no execution workspace policy stay in compatible/shared mode + +## 3. Existing issue migration + +On upgrade: + +- existing issues default to `executionWorkspacePreference=inherit` +- if an issue already has execution workspace settings, map them forward directly +- if an issue has no explicit workspace data, preserve existing behavior and do not force a user-visible choice + +## 4. Existing run/runtime migration + +On upgrade: + +- active or recent runtime services can be backfilled into execution workspace history where feasible +- missing history should not block rollout; forward correctness matters more than perfect historical reconstruction + +## 5. Rollout UX + +Use additive language in the UI: + +- `Code` +- `Workspace automation` +- `Optional` +- `Advanced` + +Avoid migration copy that implies users were previously using the product "wrong". + +## Cloud Deployment Requirements + +## 1. Paperclip host and execution host must be decoupled + +Paperclip may run: + +- locally with direct filesystem access +- in a cloud app host such as Vercel +- in a hybrid setup with external job runners + +The workspace model must work in all three. + +## 2. Remote execution must support first-class work product reporting + +A cloud agent should be able to: + +- resolve a project workspace +- realize an adapter-managed execution workspace remotely +- produce a branch +- open or update a PR +- emit preview URLs +- register artifacts + +without the Paperclip host itself running local git or local preview processes. + +## 3. Local-only assumptions must be optional + +The following must be optional, not required: + +- local `cwd` +- local git CLI +- host-managed worktree directories +- host-owned long-lived preview processes + +## 4. Same product surface, different provider behavior + +The UI should not split into "local mode" and "cloud mode" products. + +Instead: + +- local projects show path/git implementation details +- cloud projects show provider/reference details +- both surface the same high-level objects: + - project workspace + - execution workspace + - work product + - runtime service or preview + +## Behavior Rules + +## 1. Cleanup must not depend on agents remembering `in_review` + +Agents may still use `in_review`, but cleanup behavior must be governed by policy and observed state. + +### Keep an execution workspace alive while any of these are true + +- a linked issue is non-terminal +- a linked PR is open +- a linked preview/runtime service is active +- the workspace is still within retention window + +### Hide instead of deleting aggressively + +Archived or idle workspaces should be hidden from default lists before they are hard-cleaned up. + +## 2. Multiple issues may intentionally share one execution workspace + +This is how Paperclip supports: + +- solo dev on a shared branch +- operator integration branches +- related features batched into one PR + +This is the key reason not to force 1 issue = 1 workspace = 1 PR. + +## 3. Isolated issue workspaces remain opt-in + +Even in a git-heavy project, isolated workspaces should be optional. + +Examples where shared mode is valid: + +- tiny bug fixes +- branchless prototyping +- non-git projects +- single-user local workflows + +## 4. PR policy belongs to git-backed workspace policy + +PR automation decisions should be made at the project/workspace policy layer. + +The issue should only: + +- surface the resulting PR +- route approvals/review requests +- show status and review state + +## 5. Work product is the user-facing unifier + +Previews, PRs, commits, and artifacts should all be discoverable through one consistent issue-level affordance. + +That keeps Paperclip focused on coordination and visibility instead of splitting outputs across many hidden subsystems. + +## Recommended Implementation Order + +## Phase 1: Clarify current objects in UI + +1. Surface `Project > Code` tab +2. Show existing project workspaces there +3. Re-enable project-level execution workspace policy with revised copy +4. Keep issue creation simple with inherited defaults + +## Phase 2: Add explicit execution workspace record + +1. Add `execution_workspaces` +2. Link runs, issues, previews, and PRs to it +3. Add simple execution workspace detail page +4. Make `cwd` optional and ensure provider-managed remote workspaces are supported from day one + +## Phase 3: Add work product model + +1. Add `issue_work_products` +2. Ingest PRs, previews, branches, commits +3. Add issue `Work Product` tab +4. Add inbox items for actionable work product state changes +5. Support remote agent-created PR/preview reporting without local ownership + +## Phase 4: Add advanced reuse and cleanup workflows + +1. Add `reuse existing execution workspace` +2. Add cleanup lifecycle UI +3. Add operator branch workflow shortcuts +4. Add richer external preview harvesting +5. Add migration tooling/backfill where it improves continuity for existing users + +## Why This Model Is Right + +This model keeps the product balanced: + +- simple enough for solo users +- strong enough for real engineering teams +- flexible for non-git projects +- explicit enough to govern PRs and previews + +Most importantly, it keeps the abstractions clean: + +- projects plan the work +- project workspaces define the durable codebases +- execution workspaces define where work ran +- work product defines what came out of the work +- PRs remain outputs, not the core task model + +It also keeps the rollout practical: + +- existing users can upgrade without workflow breakage +- local-first installs stay simple +- cloud-hosted Paperclip deployments remain first-class + +That is a better fit for Paperclip than either extreme: + +- hiding workspace behavior until nobody understands it +- or making the whole app revolve around code-host mechanics diff --git a/doc/spec/agent-runs.md b/doc/spec/agent-runs.md index 4c172c7b..f0d02275 100644 --- a/doc/spec/agent-runs.md +++ b/doc/spec/agent-runs.md @@ -249,7 +249,7 @@ Runs local `claude` CLI directly. "cwd": "/absolute/or/relative/path", "promptTemplate": "You are agent {{agent.id}} ...", "model": "optional-model-id", - "maxTurnsPerRun": 80, + "maxTurnsPerRun": 300, "dangerouslySkipPermissions": true, "env": {"KEY": "VALUE"}, "extraArgs": [], diff --git a/docs/adapters/claude-local.md b/docs/adapters/claude-local.md index 3b80f288..c6029e0c 100644 --- a/docs/adapters/claude-local.md +++ b/docs/adapters/claude-local.md @@ -20,7 +20,7 @@ The `claude_local` adapter runs Anthropic's Claude Code CLI locally. It supports | `env` | object | No | Environment variables (supports secret refs) | | `timeoutSec` | number | No | Process timeout (0 = no timeout) | | `graceSec` | number | No | Grace period before force-kill | -| `maxTurnsPerRun` | number | No | Max agentic turns per heartbeat | +| `maxTurnsPerRun` | number | No | Max agentic turns per heartbeat (defaults to `300`) | | `dangerouslySkipPermissions` | boolean | No | Skip permission prompts (dev only) | ## Prompt Templates diff --git a/packages/adapter-utils/src/server-utils.ts b/packages/adapter-utils/src/server-utils.ts index 2b9de31f..30f0c9bd 100644 --- a/packages/adapter-utils/src/server-utils.ts +++ b/packages/adapter-utils/src/server-utils.ts @@ -32,6 +32,23 @@ export const runningProcesses = new Map(); export const MAX_CAPTURE_BYTES = 4 * 1024 * 1024; export const MAX_EXCERPT_BYTES = 32 * 1024; const SENSITIVE_ENV_KEY = /(key|token|secret|password|passwd|authorization|cookie)/i; +const PAPERCLIP_SKILL_ROOT_RELATIVE_CANDIDATES = [ + "../../skills", + "../../../../../skills", +]; + +export interface PaperclipSkillEntry { + name: string; + source: string; +} + +function normalizePathSlashes(value: string): string { + return value.replaceAll("\\", "/"); +} + +function isMaintainerOnlySkillTarget(candidate: string): boolean { + return normalizePathSlashes(candidate).includes("/.agents/skills/"); +} export function parseObject(value: unknown): Record { if (typeof value !== "object" || value === null || Array.isArray(value)) { @@ -245,6 +262,136 @@ export async function ensureAbsoluteDirectory( } } +export async function resolvePaperclipSkillsDir( + moduleDir: string, + additionalCandidates: string[] = [], +): Promise { + const candidates = [ + ...PAPERCLIP_SKILL_ROOT_RELATIVE_CANDIDATES.map((relativePath) => path.resolve(moduleDir, relativePath)), + ...additionalCandidates.map((candidate) => path.resolve(candidate)), + ]; + const seenRoots = new Set(); + + for (const root of candidates) { + if (seenRoots.has(root)) continue; + seenRoots.add(root); + const isDirectory = await fs.stat(root).then((stats) => stats.isDirectory()).catch(() => false); + if (isDirectory) return root; + } + + return null; +} + +export async function listPaperclipSkillEntries( + moduleDir: string, + additionalCandidates: string[] = [], +): Promise { + const root = await resolvePaperclipSkillsDir(moduleDir, additionalCandidates); + if (!root) return []; + + try { + const entries = await fs.readdir(root, { withFileTypes: true }); + return entries + .filter((entry) => entry.isDirectory()) + .map((entry) => ({ + name: entry.name, + source: path.join(root, entry.name), + })); + } catch { + return []; + } +} + +export async function readPaperclipSkillMarkdown( + moduleDir: string, + skillName: string, +): Promise { + const normalized = skillName.trim().toLowerCase(); + if (!normalized) return null; + + const entries = await listPaperclipSkillEntries(moduleDir); + const match = entries.find((entry) => entry.name === normalized); + if (!match) return null; + + try { + return await fs.readFile(path.join(match.source, "SKILL.md"), "utf8"); + } catch { + return null; + } +} + +export async function ensurePaperclipSkillSymlink( + source: string, + target: string, + linkSkill: (source: string, target: string) => Promise = (linkSource, linkTarget) => + fs.symlink(linkSource, linkTarget), +): Promise<"created" | "repaired" | "skipped"> { + const existing = await fs.lstat(target).catch(() => null); + if (!existing) { + await linkSkill(source, target); + return "created"; + } + + if (!existing.isSymbolicLink()) { + return "skipped"; + } + + const linkedPath = await fs.readlink(target).catch(() => null); + if (!linkedPath) return "skipped"; + + const resolvedLinkedPath = path.resolve(path.dirname(target), linkedPath); + if (resolvedLinkedPath === source) { + return "skipped"; + } + + const linkedPathExists = await fs.stat(resolvedLinkedPath).then(() => true).catch(() => false); + if (linkedPathExists) { + return "skipped"; + } + + await fs.unlink(target); + await linkSkill(source, target); + return "repaired"; +} + +export async function removeMaintainerOnlySkillSymlinks( + skillsHome: string, + allowedSkillNames: Iterable, +): Promise { + const allowed = new Set(Array.from(allowedSkillNames)); + try { + const entries = await fs.readdir(skillsHome, { withFileTypes: true }); + const removed: string[] = []; + for (const entry of entries) { + if (allowed.has(entry.name)) continue; + + const target = path.join(skillsHome, entry.name); + const existing = await fs.lstat(target).catch(() => null); + if (!existing?.isSymbolicLink()) continue; + + const linkedPath = await fs.readlink(target).catch(() => null); + if (!linkedPath) continue; + + const resolvedLinkedPath = path.isAbsolute(linkedPath) + ? linkedPath + : path.resolve(path.dirname(target), linkedPath); + if ( + !isMaintainerOnlySkillTarget(linkedPath) && + !isMaintainerOnlySkillTarget(resolvedLinkedPath) + ) { + continue; + } + + await fs.unlink(target); + removed.push(entry.name); + } + + return removed; + } catch { + return []; + } +} + export async function ensureCommandResolvable(command: string, cwd: string, env: NodeJS.ProcessEnv) { const resolved = await resolveCommandPath(command, cwd, env); if (resolved) return; diff --git a/packages/adapters/codex-local/src/server/execute.ts b/packages/adapters/codex-local/src/server/execute.ts index 3dec4ff7..c51dc8a1 100644 --- a/packages/adapters/codex-local/src/server/execute.ts +++ b/packages/adapters/codex-local/src/server/execute.ts @@ -13,17 +13,16 @@ import { redactEnvForLogs, ensureAbsoluteDirectory, ensureCommandResolvable, + ensurePaperclipSkillSymlink, ensurePathInEnv, + listPaperclipSkillEntries, + removeMaintainerOnlySkillSymlinks, renderTemplate, runChildProcess, } from "@paperclipai/adapter-utils/server-utils"; import { parseCodexJsonl, isCodexUnknownSessionError } from "./parse.js"; const __moduleDir = path.dirname(fileURLToPath(import.meta.url)); -const PAPERCLIP_SKILLS_CANDIDATES = [ - path.resolve(__moduleDir, "../../skills"), // published: /dist/server/ -> /skills/ - path.resolve(__moduleDir, "../../../../../skills"), // dev: src/server/ -> repo root/skills/ -]; const CODEX_ROLLOUT_NOISE_RE = /^\d{4}-\d{2}-\d{2}T[^\s]+\s+ERROR\s+codex_core::rollout::list:\s+state db missing rollout path for thread\s+[a-z0-9-]+$/i; @@ -67,33 +66,42 @@ function codexHomeDir(): string { return path.join(os.homedir(), ".codex"); } -async function resolvePaperclipSkillsDir(): Promise { - for (const candidate of PAPERCLIP_SKILLS_CANDIDATES) { - const isDir = await fs.stat(candidate).then((s) => s.isDirectory()).catch(() => false); - if (isDir) return candidate; - } - return null; -} +type EnsureCodexSkillsInjectedOptions = { + skillsHome?: string; + skillsEntries?: Awaited>; + linkSkill?: (source: string, target: string) => Promise; +}; -async function ensureCodexSkillsInjected(onLog: AdapterExecutionContext["onLog"]) { - const skillsDir = await resolvePaperclipSkillsDir(); - if (!skillsDir) return; +export async function ensureCodexSkillsInjected( + onLog: AdapterExecutionContext["onLog"], + options: EnsureCodexSkillsInjectedOptions = {}, +) { + const skillsEntries = options.skillsEntries ?? await listPaperclipSkillEntries(__moduleDir); + if (skillsEntries.length === 0) return; - const skillsHome = path.join(codexHomeDir(), "skills"); + const skillsHome = options.skillsHome ?? path.join(codexHomeDir(), "skills"); await fs.mkdir(skillsHome, { recursive: true }); - const entries = await fs.readdir(skillsDir, { withFileTypes: true }); - for (const entry of entries) { - if (!entry.isDirectory()) continue; - const source = path.join(skillsDir, entry.name); + const removedSkills = await removeMaintainerOnlySkillSymlinks( + skillsHome, + skillsEntries.map((entry) => entry.name), + ); + for (const skillName of removedSkills) { + await onLog( + "stderr", + `[paperclip] Removed maintainer-only Codex skill "${skillName}" from ${skillsHome}\n`, + ); + } + const linkSkill = options.linkSkill; + for (const entry of skillsEntries) { const target = path.join(skillsHome, entry.name); - const existing = await fs.lstat(target).catch(() => null); - if (existing) continue; try { - await fs.symlink(source, target); + const result = await ensurePaperclipSkillSymlink(entry.source, target, linkSkill); + if (result === "skipped") continue; + await onLog( "stderr", - `[paperclip] Injected Codex skill "${entry.name}" into ${skillsHome}\n`, + `[paperclip] ${result === "repaired" ? "Repaired" : "Injected"} Codex skill "${entry.name}" into ${skillsHome}\n`, ); } catch (err) { await onLog( diff --git a/packages/adapters/codex-local/src/server/index.ts b/packages/adapters/codex-local/src/server/index.ts index 1b8dad75..04c1e368 100644 --- a/packages/adapters/codex-local/src/server/index.ts +++ b/packages/adapters/codex-local/src/server/index.ts @@ -1,4 +1,4 @@ -export { execute } from "./execute.js"; +export { execute, ensureCodexSkillsInjected } from "./execute.js"; export { testEnvironment } from "./test.js"; export { parseCodexJsonl, isCodexUnknownSessionError } from "./parse.js"; import type { AdapterSessionCodec } from "@paperclipai/adapter-utils"; diff --git a/packages/adapters/cursor-local/src/server/execute.ts b/packages/adapters/cursor-local/src/server/execute.ts index 162ed5c6..043c3ef1 100644 --- a/packages/adapters/cursor-local/src/server/execute.ts +++ b/packages/adapters/cursor-local/src/server/execute.ts @@ -1,5 +1,4 @@ import fs from "node:fs/promises"; -import type { Dirent } from "node:fs"; import os from "node:os"; import path from "node:path"; import { fileURLToPath } from "node:url"; @@ -13,7 +12,10 @@ import { redactEnvForLogs, ensureAbsoluteDirectory, ensureCommandResolvable, + ensurePaperclipSkillSymlink, ensurePathInEnv, + listPaperclipSkillEntries, + removeMaintainerOnlySkillSymlinks, renderTemplate, runChildProcess, } from "@paperclipai/adapter-utils/server-utils"; @@ -23,10 +25,6 @@ import { normalizeCursorStreamLine } from "../shared/stream.js"; import { hasCursorTrustBypassArg } from "../shared/trust.js"; const __moduleDir = path.dirname(fileURLToPath(import.meta.url)); -const PAPERCLIP_SKILLS_CANDIDATES = [ - path.resolve(__moduleDir, "../../skills"), - path.resolve(__moduleDir, "../../../../../skills"), -]; function firstNonEmptyLine(text: string): string { return ( @@ -82,16 +80,9 @@ function cursorSkillsHome(): string { return path.join(os.homedir(), ".cursor", "skills"); } -async function resolvePaperclipSkillsDir(): Promise { - for (const candidate of PAPERCLIP_SKILLS_CANDIDATES) { - const isDir = await fs.stat(candidate).then((s) => s.isDirectory()).catch(() => false); - if (isDir) return candidate; - } - return null; -} - type EnsureCursorSkillsInjectedOptions = { skillsDir?: string | null; + skillsEntries?: Array<{ name: string; source: string }>; skillsHome?: string; linkSkill?: (source: string, target: string) => Promise; }; @@ -100,8 +91,13 @@ export async function ensureCursorSkillsInjected( onLog: AdapterExecutionContext["onLog"], options: EnsureCursorSkillsInjectedOptions = {}, ) { - const skillsDir = options.skillsDir ?? await resolvePaperclipSkillsDir(); - if (!skillsDir) return; + const skillsEntries = options.skillsEntries + ?? (options.skillsDir + ? (await fs.readdir(options.skillsDir, { withFileTypes: true })) + .filter((entry) => entry.isDirectory()) + .map((entry) => ({ name: entry.name, source: path.join(options.skillsDir!, entry.name) })) + : await listPaperclipSkillEntries(__moduleDir)); + if (skillsEntries.length === 0) return; const skillsHome = options.skillsHome ?? cursorSkillsHome(); try { @@ -113,31 +109,26 @@ export async function ensureCursorSkillsInjected( ); return; } - - let entries: Dirent[]; - try { - entries = await fs.readdir(skillsDir, { withFileTypes: true }); - } catch (err) { + const removedSkills = await removeMaintainerOnlySkillSymlinks( + skillsHome, + skillsEntries.map((entry) => entry.name), + ); + for (const skillName of removedSkills) { await onLog( "stderr", - `[paperclip] Failed to read Paperclip skills from ${skillsDir}: ${err instanceof Error ? err.message : String(err)}\n`, + `[paperclip] Removed maintainer-only Cursor skill "${skillName}" from ${skillsHome}\n`, ); - return; } - const linkSkill = options.linkSkill ?? ((source: string, target: string) => fs.symlink(source, target)); - for (const entry of entries) { - if (!entry.isDirectory()) continue; - const source = path.join(skillsDir, entry.name); + for (const entry of skillsEntries) { const target = path.join(skillsHome, entry.name); - const existing = await fs.lstat(target).catch(() => null); - if (existing) continue; - try { - await linkSkill(source, target); + const result = await ensurePaperclipSkillSymlink(entry.source, target, linkSkill); + if (result === "skipped") continue; + await onLog( "stderr", - `[paperclip] Injected Cursor skill "${entry.name}" into ${skillsHome}\n`, + `[paperclip] ${result === "repaired" ? "Repaired" : "Injected"} Cursor skill "${entry.name}" into ${skillsHome}\n`, ); } catch (err) { await onLog( diff --git a/packages/adapters/gemini-local/src/server/execute.ts b/packages/adapters/gemini-local/src/server/execute.ts index 4ffb51e3..2408b425 100644 --- a/packages/adapters/gemini-local/src/server/execute.ts +++ b/packages/adapters/gemini-local/src/server/execute.ts @@ -12,7 +12,10 @@ import { buildPaperclipEnv, ensureAbsoluteDirectory, ensureCommandResolvable, + ensurePaperclipSkillSymlink, ensurePathInEnv, + listPaperclipSkillEntries, + removeMaintainerOnlySkillSymlinks, parseObject, redactEnvForLogs, renderTemplate, @@ -29,10 +32,6 @@ import { import { firstNonEmptyLine } from "./utils.js"; const __moduleDir = path.dirname(fileURLToPath(import.meta.url)); -const PAPERCLIP_SKILLS_CANDIDATES = [ - path.resolve(__moduleDir, "../../skills"), - path.resolve(__moduleDir, "../../../../../skills"), -]; function hasNonEmptyEnvValue(env: Record, key: string): boolean { const raw = env[key]; @@ -73,14 +72,6 @@ function renderApiAccessNote(env: Record): string { ].join("\n"); } -async function resolvePaperclipSkillsDir(): Promise { - for (const candidate of PAPERCLIP_SKILLS_CANDIDATES) { - const isDir = await fs.stat(candidate).then((s) => s.isDirectory()).catch(() => false); - if (isDir) return candidate; - } - return null; -} - function geminiSkillsHome(): string { return path.join(os.homedir(), ".gemini", "skills"); } @@ -93,8 +84,8 @@ function geminiSkillsHome(): string { async function ensureGeminiSkillsInjected( onLog: AdapterExecutionContext["onLog"], ): Promise { - const skillsDir = await resolvePaperclipSkillsDir(); - if (!skillsDir) return; + const skillsEntries = await listPaperclipSkillEntries(__moduleDir); + if (skillsEntries.length === 0) return; const skillsHome = geminiSkillsHome(); try { @@ -106,28 +97,27 @@ async function ensureGeminiSkillsInjected( ); return; } - - let entries: Dirent[]; - try { - entries = await fs.readdir(skillsDir, { withFileTypes: true }); - } catch (err) { + const removedSkills = await removeMaintainerOnlySkillSymlinks( + skillsHome, + skillsEntries.map((entry) => entry.name), + ); + for (const skillName of removedSkills) { await onLog( "stderr", - `[paperclip] Failed to read Paperclip skills from ${skillsDir}: ${err instanceof Error ? err.message : String(err)}\n`, + `[paperclip] Removed maintainer-only Gemini skill "${skillName}" from ${skillsHome}\n`, ); - return; } - for (const entry of entries) { - if (!entry.isDirectory()) continue; - const source = path.join(skillsDir, entry.name); + for (const entry of skillsEntries) { const target = path.join(skillsHome, entry.name); - const existing = await fs.lstat(target).catch(() => null); - if (existing) continue; try { - await fs.symlink(source, target); - await onLog("stderr", `[paperclip] Linked Gemini skill: ${entry.name}\n`); + const result = await ensurePaperclipSkillSymlink(entry.source, target); + if (result === "skipped") continue; + await onLog( + "stderr", + `[paperclip] ${result === "repaired" ? "Repaired" : "Linked"} Gemini skill: ${entry.name}\n`, + ); } catch (err) { await onLog( "stderr", diff --git a/packages/adapters/opencode-local/src/server/models.ts b/packages/adapters/opencode-local/src/server/models.ts index dd2eb2c6..a4d1a46d 100644 --- a/packages/adapters/opencode-local/src/server/models.ts +++ b/packages/adapters/opencode-local/src/server/models.ts @@ -7,6 +7,7 @@ import { } from "@paperclipai/adapter-utils/server-utils"; const MODELS_CACHE_TTL_MS = 60_000; +const MODELS_DISCOVERY_TIMEOUT_MS = 20_000; function resolveOpenCodeCommand(input: unknown): string { const envOverride = @@ -115,14 +116,14 @@ export async function discoverOpenCodeModels(input: { { cwd, env: runtimeEnv, - timeoutSec: 20, + timeoutSec: MODELS_DISCOVERY_TIMEOUT_MS / 1000, graceSec: 3, onLog: async () => {}, }, ); if (result.timedOut) { - throw new Error("`opencode models` timed out."); + throw new Error(`\`opencode models\` timed out after ${MODELS_DISCOVERY_TIMEOUT_MS / 1000}s.`); } if ((result.exitCode ?? 1) !== 0) { const detail = firstNonEmptyLine(result.stderr) || firstNonEmptyLine(result.stdout); diff --git a/packages/adapters/pi-local/src/server/execute.ts b/packages/adapters/pi-local/src/server/execute.ts index 23cad28b..dfb1453b 100644 --- a/packages/adapters/pi-local/src/server/execute.ts +++ b/packages/adapters/pi-local/src/server/execute.ts @@ -12,7 +12,10 @@ import { redactEnvForLogs, ensureAbsoluteDirectory, ensureCommandResolvable, + ensurePaperclipSkillSymlink, ensurePathInEnv, + listPaperclipSkillEntries, + removeMaintainerOnlySkillSymlinks, renderTemplate, runChildProcess, } from "@paperclipai/adapter-utils/server-utils"; @@ -20,10 +23,6 @@ import { isPiUnknownSessionError, parsePiJsonl } from "./parse.js"; import { ensurePiModelConfiguredAndAvailable } from "./models.js"; const __moduleDir = path.dirname(fileURLToPath(import.meta.url)); -const PAPERCLIP_SKILLS_CANDIDATES = [ - path.resolve(__moduleDir, "../../skills"), - path.resolve(__moduleDir, "../../../../../skills"), -]; const PAPERCLIP_SESSIONS_DIR = path.join(os.homedir(), ".pi", "paperclips"); @@ -50,34 +49,32 @@ function parseModelId(model: string | null): string | null { return trimmed.slice(trimmed.indexOf("/") + 1).trim() || null; } -async function resolvePaperclipSkillsDir(): Promise { - for (const candidate of PAPERCLIP_SKILLS_CANDIDATES) { - const isDir = await fs.stat(candidate).then((s) => s.isDirectory()).catch(() => false); - if (isDir) return candidate; - } - return null; -} - async function ensurePiSkillsInjected(onLog: AdapterExecutionContext["onLog"]) { - const skillsDir = await resolvePaperclipSkillsDir(); - if (!skillsDir) return; + const skillsEntries = await listPaperclipSkillEntries(__moduleDir); + if (skillsEntries.length === 0) return; const piSkillsHome = path.join(os.homedir(), ".pi", "agent", "skills"); await fs.mkdir(piSkillsHome, { recursive: true }); - - const entries = await fs.readdir(skillsDir, { withFileTypes: true }); - for (const entry of entries) { - if (!entry.isDirectory()) continue; - const source = path.join(skillsDir, entry.name); + const removedSkills = await removeMaintainerOnlySkillSymlinks( + piSkillsHome, + skillsEntries.map((entry) => entry.name), + ); + for (const skillName of removedSkills) { + await onLog( + "stderr", + `[paperclip] Removed maintainer-only Pi skill "${skillName}" from ${piSkillsHome}\n`, + ); + } + + for (const entry of skillsEntries) { const target = path.join(piSkillsHome, entry.name); - const existing = await fs.lstat(target).catch(() => null); - if (existing) continue; try { - await fs.symlink(source, target); + const result = await ensurePaperclipSkillSymlink(entry.source, target); + if (result === "skipped") continue; await onLog( "stderr", - `[paperclip] Injected Pi skill "${entry.name}" into ${piSkillsHome}\n`, + `[paperclip] ${result === "repaired" ? "Repaired" : "Injected"} Pi skill "${entry.name}" into ${piSkillsHome}\n`, ); } catch (err) { await onLog( diff --git a/packages/db/src/client.ts b/packages/db/src/client.ts index c4275dc4..83b4aa78 100644 --- a/packages/db/src/client.ts +++ b/packages/db/src/client.ts @@ -730,7 +730,7 @@ export async function ensurePostgresDatabase( `; if (existing.length > 0) return "exists"; - await sql.unsafe(`create database "${databaseName}"`); + await sql.unsafe(`create database "${databaseName}" encoding 'UTF8' lc_collate 'C' lc_ctype 'C' template template0`); return "created"; } finally { await sql.end(); diff --git a/packages/db/src/migration-runtime.ts b/packages/db/src/migration-runtime.ts index bc90b762..e07bdf04 100644 --- a/packages/db/src/migration-runtime.ts +++ b/packages/db/src/migration-runtime.ts @@ -17,6 +17,7 @@ type EmbeddedPostgresCtor = new (opts: { password: string; port: number; persistent: boolean; + initdbFlags?: string[]; onLog?: (message: unknown) => void; onError?: (message: unknown) => void; }) => EmbeddedPostgresInstance; @@ -96,6 +97,7 @@ async function ensureEmbeddedPostgresConnection( password: "paperclip", port: preferredPort, persistent: true, + initdbFlags: ["--encoding=UTF8", "--locale=C"], onLog: () => {}, onError: () => {}, }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d1dd1ddc..f6820f52 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -14,6 +14,9 @@ importers: '@playwright/test': specifier: ^1.58.2 version: 1.58.2 + cross-env: + specifier: ^10.1.0 + version: 10.1.0 esbuild: specifier: ^0.27.3 version: 0.27.3 @@ -38,6 +41,9 @@ importers: '@paperclipai/adapter-cursor-local': specifier: workspace:* version: link:../packages/adapters/cursor-local + '@paperclipai/adapter-gemini-local': + specifier: workspace:* + version: link:../packages/adapters/gemini-local '@paperclipai/adapter-openclaw-gateway': specifier: workspace:* version: link:../packages/adapters/openclaw-gateway @@ -68,6 +74,9 @@ importers: drizzle-orm: specifier: 0.38.4 version: 0.38.4(@electric-sql/pglite@0.3.15)(@types/react@19.2.14)(kysely@0.28.11)(pg@8.18.0)(postgres@3.4.8)(react@19.2.4) + embedded-postgres: + specifier: ^18.1.0-beta.16 + version: 18.1.0-beta.16 picocolors: specifier: ^1.1.1 version: 1.1.1 @@ -139,6 +148,22 @@ importers: specifier: ^5.7.3 version: 5.9.3 + packages/adapters/gemini-local: + dependencies: + '@paperclipai/adapter-utils': + specifier: workspace:* + version: link:../../adapter-utils + picocolors: + specifier: ^1.1.1 + version: 1.1.1 + devDependencies: + '@types/node': + specifier: ^24.6.0 + version: 24.12.0 + typescript: + specifier: ^5.7.3 + version: 5.9.3 + packages/adapters/openclaw-gateway: dependencies: '@paperclipai/adapter-utils': @@ -245,6 +270,9 @@ importers: '@paperclipai/adapter-cursor-local': specifier: workspace:* version: link:../packages/adapters/cursor-local + '@paperclipai/adapter-gemini-local': + specifier: workspace:* + version: link:../packages/adapters/gemini-local '@paperclipai/adapter-openclaw-gateway': specifier: workspace:* version: link:../packages/adapters/openclaw-gateway @@ -321,6 +349,9 @@ importers: '@types/ws': specifier: ^8.18.1 version: 8.18.1 + cross-env: + specifier: ^10.1.0 + version: 10.1.0 supertest: specifier: ^7.0.0 version: 7.2.2 @@ -360,6 +391,9 @@ importers: '@paperclipai/adapter-cursor-local': specifier: workspace:* version: link:../packages/adapters/cursor-local + '@paperclipai/adapter-gemini-local': + specifier: workspace:* + version: link:../packages/adapters/gemini-local '@paperclipai/adapter-openclaw-gateway': specifier: workspace:* version: link:../packages/adapters/openclaw-gateway @@ -989,6 +1023,9 @@ packages: cpu: [x64] os: [win32] + '@epic-web/invariant@1.0.0': + resolution: {integrity: sha512-lrTPqgvfFQtR/eY/qkIzp98OGdNJu0m5ji3q/nJI8v3SXkRKEnWiOxMmbvcSoAIzv/cGiuvRy57k4suKQSAdwA==} + '@esbuild-kit/core-utils@3.3.2': resolution: {integrity: sha512-sPRAnw9CdSsRmEtnsl2WXWdyquogVpB3yZ3dgwJfe8zrOzTsV7cJvmwrKVa+0ma5BoiGJ+BoqkMvawbayKUsqQ==} deprecated: 'Merged into tsx: https://tsx.is' @@ -3424,6 +3461,11 @@ packages: crelt@1.0.6: resolution: {integrity: sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==} + cross-env@10.1.0: + resolution: {integrity: sha512-GsYosgnACZTADcmEyJctkJIoqAhHjttw7RsFrVoJNXbsWWqaq6Ym+7kZjq6mS45O0jij6vtiReppKQEtqWy6Dw==} + engines: {node: '>=20'} + hasBin: true + cross-spawn@7.0.6: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} @@ -6741,6 +6783,8 @@ snapshots: '@embedded-postgres/windows-x64@18.1.0-beta.16': optional: true + '@epic-web/invariant@1.0.0': {} + '@esbuild-kit/core-utils@3.3.2': dependencies: esbuild: 0.18.20 @@ -9255,6 +9299,11 @@ snapshots: crelt@1.0.6: {} + cross-env@10.1.0: + dependencies: + '@epic-web/invariant': 1.0.0 + cross-spawn: 7.0.6 + cross-spawn@7.0.6: dependencies: path-key: 3.1.1 diff --git a/server/package.json b/server/package.json index 1dd9b073..cd30cf13 100644 --- a/server/package.json +++ b/server/package.json @@ -38,9 +38,9 @@ "@paperclipai/adapter-codex-local": "workspace:*", "@paperclipai/adapter-cursor-local": "workspace:*", "@paperclipai/adapter-gemini-local": "workspace:*", + "@paperclipai/adapter-openclaw-gateway": "workspace:*", "@paperclipai/adapter-opencode-local": "workspace:*", "@paperclipai/adapter-pi-local": "workspace:*", - "@paperclipai/adapter-openclaw-gateway": "workspace:*", "@paperclipai/adapter-utils": "workspace:*", "@paperclipai/db": "workspace:*", "@paperclipai/shared": "workspace:*", diff --git a/server/src/__tests__/issue-goal-fallback.test.ts b/server/src/__tests__/issue-goal-fallback.test.ts new file mode 100644 index 00000000..cae1b8ab --- /dev/null +++ b/server/src/__tests__/issue-goal-fallback.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it } from "vitest"; +import { + resolveIssueGoalId, + resolveNextIssueGoalId, +} from "../services/issue-goal-fallback.ts"; + +describe("issue goal fallback", () => { + it("assigns the company goal when creating an issue without project or goal", () => { + expect( + resolveIssueGoalId({ + projectId: null, + goalId: null, + defaultGoalId: "goal-1", + }), + ).toBe("goal-1"); + }); + + it("keeps an explicit goal when creating an issue", () => { + expect( + resolveIssueGoalId({ + projectId: null, + goalId: "goal-2", + defaultGoalId: "goal-1", + }), + ).toBe("goal-2"); + }); + + it("does not force a company goal when the issue belongs to a project", () => { + expect( + resolveIssueGoalId({ + projectId: "project-1", + goalId: null, + defaultGoalId: "goal-1", + }), + ).toBeNull(); + }); + + it("backfills the company goal on update for legacy no-project issues", () => { + expect( + resolveNextIssueGoalId({ + currentProjectId: null, + currentGoalId: null, + defaultGoalId: "goal-1", + }), + ).toBe("goal-1"); + }); + + it("clears the fallback when a project is added later", () => { + expect( + resolveNextIssueGoalId({ + currentProjectId: null, + currentGoalId: "goal-1", + projectId: "project-1", + goalId: null, + defaultGoalId: "goal-1", + }), + ).toBeNull(); + }); +}); diff --git a/server/src/__tests__/paperclip-skill-utils.test.ts b/server/src/__tests__/paperclip-skill-utils.test.ts new file mode 100644 index 00000000..4344dc17 --- /dev/null +++ b/server/src/__tests__/paperclip-skill-utils.test.ts @@ -0,0 +1,61 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { + listPaperclipSkillEntries, + removeMaintainerOnlySkillSymlinks, +} from "@paperclipai/adapter-utils/server-utils"; + +async function makeTempDir(prefix: string): Promise { + return fs.mkdtemp(path.join(os.tmpdir(), prefix)); +} + +describe("paperclip skill utils", () => { + const cleanupDirs = new Set(); + + afterEach(async () => { + await Promise.all(Array.from(cleanupDirs).map((dir) => fs.rm(dir, { recursive: true, force: true }))); + cleanupDirs.clear(); + }); + + it("lists runtime skills from ./skills without pulling in .agents/skills", async () => { + const root = await makeTempDir("paperclip-skill-roots-"); + cleanupDirs.add(root); + + const moduleDir = path.join(root, "a", "b", "c", "d", "e"); + await fs.mkdir(moduleDir, { recursive: true }); + await fs.mkdir(path.join(root, "skills", "paperclip"), { recursive: true }); + await fs.mkdir(path.join(root, ".agents", "skills", "release"), { recursive: true }); + + const entries = await listPaperclipSkillEntries(moduleDir); + + expect(entries.map((entry) => entry.name)).toEqual(["paperclip"]); + expect(entries[0]?.source).toBe(path.join(root, "skills", "paperclip")); + }); + + it("removes stale maintainer-only symlinks from a shared skills home", async () => { + const root = await makeTempDir("paperclip-skill-cleanup-"); + cleanupDirs.add(root); + + const skillsHome = path.join(root, "skills-home"); + const runtimeSkill = path.join(root, "skills", "paperclip"); + const customSkill = path.join(root, "custom", "release-notes"); + const staleMaintainerSkill = path.join(root, ".agents", "skills", "release"); + + await fs.mkdir(skillsHome, { recursive: true }); + await fs.mkdir(runtimeSkill, { recursive: true }); + await fs.mkdir(customSkill, { recursive: true }); + + await fs.symlink(runtimeSkill, path.join(skillsHome, "paperclip")); + await fs.symlink(customSkill, path.join(skillsHome, "release-notes")); + await fs.symlink(staleMaintainerSkill, path.join(skillsHome, "release")); + + const removed = await removeMaintainerOnlySkillSymlinks(skillsHome, ["paperclip"]); + + expect(removed).toEqual(["release"]); + await expect(fs.lstat(path.join(skillsHome, "release"))).rejects.toThrow(); + expect((await fs.lstat(path.join(skillsHome, "paperclip"))).isSymbolicLink()).toBe(true); + expect((await fs.lstat(path.join(skillsHome, "release-notes"))).isSymbolicLink()).toBe(true); + }); +}); diff --git a/server/src/index.ts b/server/src/index.ts index c220df92..27b559eb 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -53,6 +53,7 @@ type EmbeddedPostgresCtor = new (opts: { password: string; port: number; persistent: boolean; + initdbFlags?: string[]; onLog?: (message: unknown) => void; onError?: (message: unknown) => void; }) => EmbeddedPostgresInstance; @@ -334,6 +335,7 @@ export async function startServer(): Promise { password: "paperclip", port, persistent: true, + initdbFlags: ["--encoding=UTF8", "--locale=C"], onLog: appendEmbeddedPostgresLog, onError: appendEmbeddedPostgresLog, }); @@ -512,11 +514,14 @@ export async function startServer(): Promise { if (config.heartbeatSchedulerEnabled) { const heartbeat = heartbeatService(db as any); - // Reap orphaned runs at startup (no threshold -- runningProcesses is empty) - void heartbeat.reapOrphanedRuns().catch((err) => { - logger.error({ err }, "startup reap of orphaned heartbeat runs failed"); - }); - + // Reap orphaned running runs at startup while in-memory execution state is empty, + // then resume any persisted queued runs that were waiting on the previous process. + void heartbeat + .reapOrphanedRuns() + .then(() => heartbeat.resumeQueuedRuns()) + .catch((err) => { + logger.error({ err }, "startup heartbeat recovery failed"); + }); setInterval(() => { void heartbeat .tickTimers(new Date()) @@ -529,11 +534,13 @@ export async function startServer(): Promise { logger.error({ err }, "heartbeat timer tick failed"); }); - // Periodically reap orphaned runs (5-min staleness threshold) + // Periodically reap orphaned runs (5-min staleness threshold) and make sure + // persisted queued work is still being driven forward. void heartbeat .reapOrphanedRuns({ staleThresholdMs: 5 * 60 * 1000 }) + .then(() => heartbeat.resumeQueuedRuns()) .catch((err) => { - logger.error({ err }, "periodic reap of orphaned heartbeat runs failed"); + logger.error({ err }, "periodic heartbeat recovery failed"); }); }, config.heartbeatSchedulerIntervalMs); } diff --git a/server/src/routes/issues.ts b/server/src/routes/issues.ts index 9c91fec4..f02067a6 100644 --- a/server/src/routes/issues.ts +++ b/server/src/routes/issues.ts @@ -294,13 +294,24 @@ export function issueRoutes(db: Db, storage: StorageService) { const [ancestors, project, goal, mentionedProjectIds] = await Promise.all([ svc.getAncestors(issue.id), issue.projectId ? projectsSvc.getById(issue.projectId) : null, - issue.goalId ? goalsSvc.getById(issue.goalId) : null, + issue.goalId + ? goalsSvc.getById(issue.goalId) + : !issue.projectId + ? goalsSvc.getDefaultCompanyGoal(issue.companyId) + : null, svc.findMentionedProjectIds(issue.id), ]); const mentionedProjects = mentionedProjectIds.length > 0 ? await projectsSvc.listByIds(issue.companyId, mentionedProjectIds) : []; - res.json({ ...issue, ancestors, project: project ?? null, goal: goal ?? null, mentionedProjects }); + res.json({ + ...issue, + goalId: goal?.id ?? issue.goalId, + ancestors, + project: project ?? null, + goal: goal ?? null, + mentionedProjects, + }); }); router.post("/issues/:id/read", async (req, res) => { diff --git a/server/src/services/company-portability.ts b/server/src/services/company-portability.ts index 06928f08..f067e957 100644 --- a/server/src/services/company-portability.ts +++ b/server/src/services/company-portability.ts @@ -85,7 +85,7 @@ const ADAPTER_DEFAULT_RULES_BY_TYPE: Record; + +export async function getDefaultCompanyGoal(db: GoalReader, companyId: string) { + const activeRootGoal = await db + .select() + .from(goals) + .where( + and( + eq(goals.companyId, companyId), + eq(goals.level, "company"), + eq(goals.status, "active"), + isNull(goals.parentId), + ), + ) + .orderBy(asc(goals.createdAt)) + .then((rows) => rows[0] ?? null); + if (activeRootGoal) return activeRootGoal; + + const anyRootGoal = await db + .select() + .from(goals) + .where( + and( + eq(goals.companyId, companyId), + eq(goals.level, "company"), + isNull(goals.parentId), + ), + ) + .orderBy(asc(goals.createdAt)) + .then((rows) => rows[0] ?? null); + if (anyRootGoal) return anyRootGoal; + + return db + .select() + .from(goals) + .where(and(eq(goals.companyId, companyId), eq(goals.level, "company"))) + .orderBy(asc(goals.createdAt)) + .then((rows) => rows[0] ?? null); +} + export function goalService(db: Db) { return { list: (companyId: string) => db.select().from(goals).where(eq(goals.companyId, companyId)), @@ -13,6 +53,8 @@ export function goalService(db: Db) { .where(eq(goals.id, id)) .then((rows) => rows[0] ?? null), + getDefaultCompanyGoal: (companyId: string) => getDefaultCompanyGoal(db, companyId), + create: (companyId: string, data: Omit) => db .insert(goals) diff --git a/server/src/services/heartbeat.ts b/server/src/services/heartbeat.ts index e782bc25..f0665c9a 100644 --- a/server/src/services/heartbeat.ts +++ b/server/src/services/heartbeat.ts @@ -455,6 +455,7 @@ export function heartbeatService(db: Db) { const runLogStore = getRunLogStore(); const secretsSvc = secretService(db); const issuesSvc = issueService(db); + const activeRunExecutions = new Set(); async function getAgent(agentId: string) { return db @@ -959,7 +960,7 @@ export function heartbeatService(db: Db) { const reaped: string[] = []; for (const run of activeRuns) { - if (runningProcesses.has(run.id)) continue; + if (runningProcesses.has(run.id) || activeRunExecutions.has(run.id)) continue; // Apply staleness threshold to avoid false positives if (staleThresholdMs > 0) { @@ -998,6 +999,18 @@ export function heartbeatService(db: Db) { return { reaped: reaped.length, runIds: reaped }; } + async function resumeQueuedRuns() { + const queuedRuns = await db + .select({ agentId: heartbeatRuns.agentId }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.status, "queued")); + + const agentIds = [...new Set(queuedRuns.map((r) => r.agentId))]; + for (const agentId of agentIds) { + await startNextQueuedRunForAgent(agentId); + } + } + async function updateRuntimeState( agent: typeof agents.$inferSelect, run: typeof heartbeatRuns.$inferSelect, @@ -1089,6 +1102,9 @@ export function heartbeatService(db: Db) { run = claimed; } + activeRunExecutions.add(run.id); + + try { const agent = await getAgent(run.agentId); if (!agent) { await setRunStatus(runId, "failed", { @@ -1676,10 +1692,41 @@ export function heartbeatService(db: Db) { } await finalizeAgentStatus(agent.id, "failed"); - } finally { - await releaseRuntimeServicesForRun(run.id); - await startNextQueuedRunForAgent(agent.id); } + } catch (outerErr) { + // Setup code before adapter.execute threw (e.g. ensureRuntimeState, resolveWorkspaceForRun). + // The inner catch did not fire, so we must record the failure here. + const message = outerErr instanceof Error ? outerErr.message : "Unknown setup failure"; + logger.error({ err: outerErr, runId }, "heartbeat execution setup failed"); + await setRunStatus(runId, "failed", { + error: message, + errorCode: "adapter_failed", + finishedAt: new Date(), + }).catch(() => undefined); + await setWakeupStatus(run.wakeupRequestId, "failed", { + finishedAt: new Date(), + error: message, + }).catch(() => undefined); + const failedRun = await getRun(runId).catch(() => null); + if (failedRun) { + // Emit a run-log event so the failure is visible in the run timeline, + // consistent with what the inner catch block does for adapter failures. + await appendRunEvent(failedRun, 1, { + eventType: "error", + stream: "system", + level: "error", + message, + }).catch(() => undefined); + await releaseIssueExecutionAndPromote(failedRun).catch(() => undefined); + } + // Ensure the agent is not left stuck in "running" if the inner catch handler's + // DB calls threw (e.g. a transient DB error in finalizeAgentStatus). + await finalizeAgentStatus(run.agentId, "failed").catch(() => undefined); + } finally { + await releaseRuntimeServicesForRun(run.id).catch(() => undefined); + activeRunExecutions.delete(run.id); + await startNextQueuedRunForAgent(run.agentId); + } } async function releaseIssueExecutionAndPromote(run: typeof heartbeatRuns.$inferSelect) { @@ -2425,6 +2472,8 @@ export function heartbeatService(db: Db) { reapOrphanedRuns, + resumeQueuedRuns, + tickTimers: async (now = new Date()) => { const allAgents = await db.select().from(agents); let checked = 0; diff --git a/server/src/services/issue-goal-fallback.ts b/server/src/services/issue-goal-fallback.ts new file mode 100644 index 00000000..fe48f0a1 --- /dev/null +++ b/server/src/services/issue-goal-fallback.ts @@ -0,0 +1,30 @@ +type MaybeId = string | null | undefined; + +export function resolveIssueGoalId(input: { + projectId: MaybeId; + goalId: MaybeId; + defaultGoalId: MaybeId; +}): string | null { + if (!input.projectId && !input.goalId) { + return input.defaultGoalId ?? null; + } + return input.goalId ?? null; +} + +export function resolveNextIssueGoalId(input: { + currentProjectId: MaybeId; + currentGoalId: MaybeId; + projectId?: MaybeId; + goalId?: MaybeId; + defaultGoalId: MaybeId; +}): string | null { + const projectId = + input.projectId !== undefined ? input.projectId : input.currentProjectId; + const goalId = + input.goalId !== undefined ? input.goalId : input.currentGoalId; + + if (!projectId && !goalId) { + return input.defaultGoalId ?? null; + } + return goalId ?? null; +} diff --git a/server/src/services/issues.ts b/server/src/services/issues.ts index 29995cd4..807a97eb 100644 --- a/server/src/services/issues.ts +++ b/server/src/services/issues.ts @@ -23,6 +23,8 @@ import { parseProjectExecutionWorkspacePolicy, } from "./execution-workspace-policy.js"; import { redactCurrentUserText } from "../log-redaction.js"; +import { resolveIssueGoalId, resolveNextIssueGoalId } from "./issue-goal-fallback.js"; +import { getDefaultCompanyGoal } from "./goals.js"; const ALL_ISSUE_STATUSES = ["backlog", "todo", "in_progress", "in_review", "blocked", "done", "cancelled"]; @@ -649,6 +651,7 @@ export function issueService(db: Db) { throw unprocessable("in_progress issues require an assignee"); } return db.transaction(async (tx) => { + const defaultCompanyGoal = await getDefaultCompanyGoal(tx, companyId); let executionWorkspaceSettings = (issueData.executionWorkspaceSettings as Record | null | undefined) ?? null; if (executionWorkspaceSettings == null && issueData.projectId) { @@ -673,6 +676,11 @@ export function issueService(db: Db) { const values = { ...issueData, + goalId: resolveIssueGoalId({ + projectId: issueData.projectId, + goalId: issueData.goalId, + defaultGoalId: defaultCompanyGoal?.id ?? null, + }), ...(executionWorkspaceSettings ? { executionWorkspaceSettings } : {}), companyId, issueNumber, @@ -752,6 +760,14 @@ export function issueService(db: Db) { } return db.transaction(async (tx) => { + const defaultCompanyGoal = await getDefaultCompanyGoal(tx, existing.companyId); + patch.goalId = resolveNextIssueGoalId({ + currentProjectId: existing.projectId, + currentGoalId: existing.goalId, + projectId: issueData.projectId, + goalId: issueData.goalId, + defaultGoalId: defaultCompanyGoal?.id ?? null, + }); const updated = await tx .update(issues) .set(patch) diff --git a/ui/src/App.tsx b/ui/src/App.tsx index ed6c9c51..1cfdd9df 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -1,5 +1,5 @@ import { useEffect, useRef } from "react"; -import { Navigate, Outlet, Route, Routes, useLocation } from "@/lib/router"; +import { Navigate, Outlet, Route, Routes, useLocation, useParams } from "@/lib/router"; import { useQuery } from "@tanstack/react-query"; import { Button } from "@/components/ui/button"; import { Layout } from "./components/Layout"; @@ -108,6 +108,7 @@ function boardRoutes() { <> } /> } /> + } /> } /> } /> } /> @@ -164,6 +165,57 @@ function LegacySettingsRedirect() { return ; } +function OnboardingRoutePage() { + const { companies, loading } = useCompany(); + const { onboardingOpen, openOnboarding } = useDialog(); + const { companyPrefix } = useParams<{ companyPrefix?: string }>(); + const opened = useRef(false); + const matchedCompany = companyPrefix + ? companies.find((company) => company.issuePrefix.toUpperCase() === companyPrefix.toUpperCase()) ?? null + : null; + + useEffect(() => { + if (loading || opened.current || onboardingOpen) return; + opened.current = true; + if (matchedCompany) { + openOnboarding({ initialStep: 2, companyId: matchedCompany.id }); + return; + } + openOnboarding(); + }, [companyPrefix, loading, matchedCompany, onboardingOpen, openOnboarding]); + + const title = matchedCompany + ? `Add another agent to ${matchedCompany.name}` + : companies.length > 0 + ? "Create another company" + : "Create your first company"; + const description = matchedCompany + ? "Run onboarding again to add an agent and a starter task for this company." + : companies.length > 0 + ? "Run onboarding again to create another company and seed its first agent." + : "Get started by creating a company and your first agent."; + + return ( +
+
+

{title}

+

{description}

+
+ +
+
+
+ ); +} + function CompanyRootRedirect() { const { companies, selectedCompany, loading } = useCompany(); const { onboardingOpen } = useDialog(); @@ -242,6 +294,7 @@ export function App() { }> } /> + } /> } /> }> } /> diff --git a/ui/src/adapters/claude-local/config-fields.tsx b/ui/src/adapters/claude-local/config-fields.tsx index 33d8a896..f62307ff 100644 --- a/ui/src/adapters/claude-local/config-fields.tsx +++ b/ui/src/adapters/claude-local/config-fields.tsx @@ -122,9 +122,9 @@ export function ClaudeLocalAdvancedFields({ value={eff( "adapterConfig", "maxTurnsPerRun", - Number(config.maxTurnsPerRun ?? 80), + Number(config.maxTurnsPerRun ?? 300), )} - onCommit={(v) => mark("adapterConfig", "maxTurnsPerRun", v || 80)} + onCommit={(v) => mark("adapterConfig", "maxTurnsPerRun", v || 300)} immediate className={inputClass} /> diff --git a/ui/src/components/Layout.tsx b/ui/src/components/Layout.tsx index 12cc6f88..a90efa9a 100644 --- a/ui/src/components/Layout.tsx +++ b/ui/src/components/Layout.tsx @@ -5,7 +5,6 @@ import { Link, Outlet, useLocation, useNavigate, useParams } from "@/lib/router" import { CompanyRail } from "./CompanyRail"; import { Sidebar } from "./Sidebar"; import { InstanceSidebar } from "./InstanceSidebar"; -import { SidebarNavItem } from "./SidebarNavItem"; import { BreadcrumbBar } from "./BreadcrumbBar"; import { PropertiesPanel } from "./PropertiesPanel"; import { CommandPalette } from "./CommandPalette"; @@ -248,12 +247,15 @@ export function Layout() {
- + + + Documentation + {/* Left half — form */} -
+
- {/* Progress indicators */} -
- - Get Started - - Step {step} of 4 - -
- {[1, 2, 3, 4].map((s) => ( -
- ))} -
+ {/* Progress tabs */} +
+ {( + [ + { step: 1 as Step, label: "Company", icon: Building2 }, + { step: 2 as Step, label: "Agent", icon: Bot }, + { step: 3 as Step, label: "Task", icon: ListTodo }, + { step: 4 as Step, label: "Launch", icon: Rocket } + ] as const + ).map(({ step: s, label, icon: Icon }) => ( + + ))}
{/* Step content */} @@ -593,8 +615,15 @@ export function OnboardingWizard() {

-
-