Merge public-gh/master into paperclip-issue-documents

Resolve conflicts by keeping the issue-documents work alongside upstream heartbeat-context, worktree branding, and adapter runtime updates. Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-13 21:47:06 -05:00
parent 45998aa9a0 bcce5b7ec2
commit ab41fdbaee
64 changed files with 4620 additions and 292 deletions
--- a/.agents/skills/create-agent-adapter/SKILL.md
+++ b/.agents/skills/create-agent-adapter/SKILL.md
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -78,6 +78,9 @@ If you change schema/API behavior, update all impacted layers:
 4. Do not replace strategic docs wholesale unless asked.
 Prefer additive updates. Keep `doc/SPEC.md` and `doc/SPEC-implementation.md` aligned.

+5. Keep plan docs dated and centralized.
+New plan documents belong in `doc/plans/` and should use `YYYY-MM-DD-slug.md` filenames.
+
 ## 6. Database Change Workflow

 When changing data model:
--- a/cli/src/tests/agent-jwt-env.test.ts
+++ b/cli/src/tests/agent-jwt-env.test.ts
@@ -4,7 +4,9 @@ import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
 import {
  ensureAgentJwtSecret,
+  mergePaperclipEnvEntries,
  readAgentJwtSecretFromEnv,
+  readPaperclipEnvEntries,
  resolveAgentJwtEnvFile,
 } from "../config/env.js";
 import { agentJwtSecretCheck } from "../checks/agent-jwt-secret-check.js";
@@ -58,4 +60,20 @@ describe("agent jwt env helpers", () => {
    const result = agentJwtSecretCheck(configPath);
    expect(result.status).toBe("pass");
  });
+
+  it("quotes hash-prefixed env values so dotenv round-trips them", () => {
+    const configPath = tempConfigPath();
+    const envPath = resolveAgentJwtEnvFile(configPath);
+
+    mergePaperclipEnvEntries(
+      {
+        PAPERCLIP_WORKTREE_COLOR: "#439edb",
+      },
+      envPath,
+    );
+
+    const contents = fs.readFileSync(envPath, "utf-8");
+    expect(contents).toContain('PAPERCLIP_WORKTREE_COLOR="#439edb"');
+    expect(readPaperclipEnvEntries(envPath).PAPERCLIP_WORKTREE_COLOR).toBe("#439edb");
+  });
 });
--- a/cli/src/tests/worktree.test.ts
+++ b/cli/src/tests/worktree.test.ts
@@ -7,6 +7,7 @@ import {
  copyGitHooksToWorktreeGitDir,
  copySeededSecretsKey,
  rebindWorkspaceCwd,
+  resolveSourceConfigPath,
  resolveGitWorktreeAddArgs,
  resolveWorktreeMakeTargetPath,
  worktreeInitCommand,
@@ -16,6 +17,7 @@ import {
  buildWorktreeConfig,
  buildWorktreeEnvEntries,
  formatShellExports,
+  generateWorktreeColor,
  resolveWorktreeSeedPlan,
  resolveWorktreeLocalPaths,
  rewriteLocalUrlPort,
@@ -181,13 +183,22 @@ describe("worktree helpers", () => {
      path.resolve("/tmp/paperclip-worktrees", "instances", "feature-worktree-support", "data", "storage"),
    );

-    const env = buildWorktreeEnvEntries(paths);
+    const env = buildWorktreeEnvEntries(paths, {
+      name: "feature-worktree-support",
+      color: "#3abf7a",
+    });
    expect(env.PAPERCLIP_HOME).toBe(path.resolve("/tmp/paperclip-worktrees"));
    expect(env.PAPERCLIP_INSTANCE_ID).toBe("feature-worktree-support");
    expect(env.PAPERCLIP_IN_WORKTREE).toBe("true");
+    expect(env.PAPERCLIP_WORKTREE_NAME).toBe("feature-worktree-support");
+    expect(env.PAPERCLIP_WORKTREE_COLOR).toBe("#3abf7a");
    expect(formatShellExports(env)).toContain("export PAPERCLIP_INSTANCE_ID='feature-worktree-support'");
  });

+  it("generates vivid worktree colors as hex", () => {
+    expect(generateWorktreeColor()).toMatch(/^#[0-9a-f]{6}$/);
+  });
+
  it("uses minimal seed mode to keep app state but drop heavy runtime history", () => {
    const minimal = resolveWorktreeSeedPlan("minimal");
    const full = resolveWorktreeSeedPlan("full");
@@ -280,7 +291,10 @@ describe("worktree helpers", () => {
      });

      const envPath = path.join(repoRoot, ".paperclip", ".env");
-      expect(fs.readFileSync(envPath, "utf8")).toContain("PAPERCLIP_AGENT_JWT_SECRET=worktree-shared-secret");
+      const envContents = fs.readFileSync(envPath, "utf8");
+      expect(envContents).toContain("PAPERCLIP_AGENT_JWT_SECRET=worktree-shared-secret");
+      expect(envContents).toContain("PAPERCLIP_WORKTREE_NAME=repo");
+      expect(envContents).toMatch(/PAPERCLIP_WORKTREE_COLOR=\"#[0-9a-f]{6}\"/);
    } finally {
      process.chdir(originalCwd);
      if (originalJwtSecret === undefined) {
@@ -292,6 +306,59 @@ describe("worktree helpers", () => {
    }
  });

+  it("defaults the seed source config to the current repo-local Paperclip config", () => {
+    const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-worktree-source-config-"));
+    const repoRoot = path.join(tempRoot, "repo");
+    const localConfigPath = path.join(repoRoot, ".paperclip", "config.json");
+    const originalCwd = process.cwd();
+    const originalPaperclipConfig = process.env.PAPERCLIP_CONFIG;
+
+    try {
+      fs.mkdirSync(path.dirname(localConfigPath), { recursive: true });
+      fs.writeFileSync(localConfigPath, JSON.stringify(buildSourceConfig()), "utf8");
+      delete process.env.PAPERCLIP_CONFIG;
+      process.chdir(repoRoot);
+
+      expect(fs.realpathSync(resolveSourceConfigPath({}))).toBe(fs.realpathSync(localConfigPath));
+    } finally {
+      process.chdir(originalCwd);
+      if (originalPaperclipConfig === undefined) {
+        delete process.env.PAPERCLIP_CONFIG;
+      } else {
+        process.env.PAPERCLIP_CONFIG = originalPaperclipConfig;
+      }
+      fs.rmSync(tempRoot, { recursive: true, force: true });
+    }
+  });
+
+  it("preserves the source config path across worktree:make cwd changes", () => {
+    const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-worktree-source-override-"));
+    const sourceConfigPath = path.join(tempRoot, "source", "config.json");
+    const targetRoot = path.join(tempRoot, "target");
+    const originalCwd = process.cwd();
+    const originalPaperclipConfig = process.env.PAPERCLIP_CONFIG;
+
+    try {
+      fs.mkdirSync(path.dirname(sourceConfigPath), { recursive: true });
+      fs.mkdirSync(targetRoot, { recursive: true });
+      fs.writeFileSync(sourceConfigPath, JSON.stringify(buildSourceConfig()), "utf8");
+      delete process.env.PAPERCLIP_CONFIG;
+      process.chdir(targetRoot);
+
+      expect(resolveSourceConfigPath({ sourceConfigPathOverride: sourceConfigPath })).toBe(
+        path.resolve(sourceConfigPath),
+      );
+    } finally {
+      process.chdir(originalCwd);
+      if (originalPaperclipConfig === undefined) {
+        delete process.env.PAPERCLIP_CONFIG;
+      } else {
+        process.env.PAPERCLIP_CONFIG = originalPaperclipConfig;
+      }
+      fs.rmSync(tempRoot, { recursive: true, force: true });
+    }
+  });
+
  it("rebinds same-repo workspace paths onto the current worktree root", () => {
    expect(
      rebindWorkspaceCwd({
--- a/cli/src/commands/worktree-lib.ts
+++ b/cli/src/commands/worktree-lib.ts
@@ -1,3 +1,4 @@
+import { randomInt } from "node:crypto";
 import path from "node:path";
 import type { PaperclipConfig } from "../config/schema.js";
 import { expandHomePrefix } from "../config/home.js";
@@ -44,6 +45,11 @@ export type WorktreeLocalPaths = {
  storageDir: string;
 };

+export type WorktreeUiBranding = {
+  name: string;
+  color: string;
+};
+
 export function isWorktreeSeedMode(value: string): value is WorktreeSeedMode {
  return (WORKTREE_SEED_MODES as readonly string[]).includes(value);
 }
@@ -87,6 +93,51 @@ export function resolveSuggestedWorktreeName(cwd: string, explicitName?: string)
  return nonEmpty(explicitName) ?? path.basename(path.resolve(cwd));
 }

+function hslComponentToHex(n: number): string {
+  return Math.round(Math.max(0, Math.min(255, n)))
+    .toString(16)
+    .padStart(2, "0");
+}
+
+function hslToHex(hue: number, saturation: number, lightness: number): string {
+  const s = Math.max(0, Math.min(100, saturation)) / 100;
+  const l = Math.max(0, Math.min(100, lightness)) / 100;
+  const c = (1 - Math.abs((2 * l) - 1)) * s;
+  const h = ((hue % 360) + 360) % 360;
+  const x = c * (1 - Math.abs(((h / 60) % 2) - 1));
+  const m = l - (c / 2);
+
+  let r = 0;
+  let g = 0;
+  let b = 0;
+
+  if (h < 60) {
+    r = c;
+    g = x;
+  } else if (h < 120) {
+    r = x;
+    g = c;
+  } else if (h < 180) {
+    g = c;
+    b = x;
+  } else if (h < 240) {
+    g = x;
+    b = c;
+  } else if (h < 300) {
+    r = x;
+    b = c;
+  } else {
+    r = c;
+    b = x;
+  }
+
+  return `#${hslComponentToHex((r + m) * 255)}${hslComponentToHex((g + m) * 255)}${hslComponentToHex((b + m) * 255)}`;
+}
+
+export function generateWorktreeColor(): string {
+  return hslToHex(randomInt(0, 360), 68, 56);
+}
+
 export function resolveWorktreeLocalPaths(opts: {
  cwd: string;
  homeDir?: string;
@@ -196,13 +247,18 @@ export function buildWorktreeConfig(input: {
  };
 }

-export function buildWorktreeEnvEntries(paths: WorktreeLocalPaths): Record<string, string> {
+export function buildWorktreeEnvEntries(
+  paths: WorktreeLocalPaths,
+  branding?: WorktreeUiBranding,
+): Record<string, string> {
  return {
    PAPERCLIP_HOME: paths.homeDir,
    PAPERCLIP_INSTANCE_ID: paths.instanceId,
    PAPERCLIP_CONFIG: paths.configPath,
    PAPERCLIP_CONTEXT: paths.contextPath,
    PAPERCLIP_IN_WORKTREE: "true",
+    ...(branding?.name ? { PAPERCLIP_WORKTREE_NAME: branding.name } : {}),
+    ...(branding?.color ? { PAPERCLIP_WORKTREE_COLOR: branding.color } : {}),
  };
 }

--- a/cli/src/commands/worktree.ts
+++ b/cli/src/commands/worktree.ts
@@ -39,6 +39,7 @@ import {
  buildWorktreeEnvEntries,
  DEFAULT_WORKTREE_HOME,
  formatShellExports,
+  generateWorktreeColor,
  isWorktreeSeedMode,
  resolveSuggestedWorktreeName,
  resolveWorktreeSeedPlan,
@@ -55,6 +56,7 @@ type WorktreeInitOptions = {
  fromConfig?: string;
  fromDataDir?: string;
  fromInstance?: string;
+  sourceConfigPathOverride?: string;
  serverPort?: number;
  dbPort?: number;
  seed?: boolean;
@@ -425,8 +427,12 @@ async function rebindSeededProjectWorkspaces(input: {
  }
 }

-function resolveSourceConfigPath(opts: WorktreeInitOptions): string {
+export function resolveSourceConfigPath(opts: WorktreeInitOptions): string {
+  if (opts.sourceConfigPathOverride) return path.resolve(opts.sourceConfigPathOverride);
  if (opts.fromConfig) return path.resolve(opts.fromConfig);
+  if (!opts.fromDataDir && !opts.fromInstance) {
+    return resolveConfigPath();
+  }
  const sourceHome = path.resolve(expandHomePrefix(opts.fromDataDir ?? "~/.paperclip"));
  const sourceInstanceId = sanitizeWorktreeInstanceId(opts.fromInstance ?? "default");
  return path.resolve(sourceHome, "instances", sourceInstanceId, "config.json");
@@ -623,7 +629,7 @@ async function seedWorktreeDatabase(input: {

 async function runWorktreeInit(opts: WorktreeInitOptions): Promise<void> {
  const cwd = process.cwd();
-  const name = resolveSuggestedWorktreeName(
+  const worktreeName = resolveSuggestedWorktreeName(
    cwd,
    opts.name ?? detectGitBranchName(cwd) ?? undefined,
  );
@@ -631,12 +637,16 @@ async function runWorktreeInit(opts: WorktreeInitOptions): Promise<void> {
  if (!isWorktreeSeedMode(seedMode)) {
    throw new Error(`Unsupported seed mode "${seedMode}". Expected one of: minimal, full.`);
  }
-  const instanceId = sanitizeWorktreeInstanceId(opts.instance ?? name);
+  const instanceId = sanitizeWorktreeInstanceId(opts.instance ?? worktreeName);
  const paths = resolveWorktreeLocalPaths({
    cwd,
    homeDir: resolveWorktreeHome(opts.home),
    instanceId,
  });
+  const branding = {
+    name: worktreeName,
+    color: generateWorktreeColor(),
+  };
  const sourceConfigPath = resolveSourceConfigPath(opts);
  const sourceConfig = existsSync(sourceConfigPath) ? readConfig(sourceConfigPath) : null;

@@ -669,7 +679,7 @@ async function runWorktreeInit(opts: WorktreeInitOptions): Promise<void> {
    nonEmpty(process.env.PAPERCLIP_AGENT_JWT_SECRET);
  mergePaperclipEnvEntries(
    {
-      ...buildWorktreeEnvEntries(paths),
+      ...buildWorktreeEnvEntries(paths, branding),
      ...(existingAgentJwtSecret ? { PAPERCLIP_AGENT_JWT_SECRET: existingAgentJwtSecret } : {}),
    },
    paths.envPath,
@@ -710,6 +720,7 @@ async function runWorktreeInit(opts: WorktreeInitOptions): Promise<void> {
  p.log.message(pc.dim(`Repo env: ${paths.envPath}`));
  p.log.message(pc.dim(`Isolated home: ${paths.homeDir}`));
  p.log.message(pc.dim(`Instance: ${paths.instanceId}`));
+  p.log.message(pc.dim(`Worktree badge: ${branding.name} (${branding.color})`));
  p.log.message(pc.dim(`Server port: ${serverPort} | DB port: ${databasePort}`));
  if (copiedGitHooks?.copied) {
    p.log.message(
@@ -745,6 +756,7 @@ export async function worktreeMakeCommand(nameArg: string, opts: WorktreeMakeOpt
  const name = resolveWorktreeMakeName(nameArg);
  const startPoint = resolveWorktreeStartPoint(opts.startPoint);
  const sourceCwd = process.cwd();
+  const sourceConfigPath = resolveSourceConfigPath(opts);
  const targetPath = resolveWorktreeMakeTargetPath(name);
  if (existsSync(targetPath)) {
    throw new Error(`Target path already exists: ${targetPath}`);
@@ -804,6 +816,7 @@ export async function worktreeMakeCommand(nameArg: string, opts: WorktreeMakeOpt
    await runWorktreeInit({
      ...opts,
      name,
+      sourceConfigPathOverride: sourceConfigPath,
    });
  } catch (error) {
    throw error;
--- a/cli/src/config/env.ts
+++ b/cli/src/config/env.ts
@@ -22,11 +22,18 @@ function parseEnvFile(contents: string) {
  }
 }

+function formatEnvValue(value: string): string {
+  if (/^[A-Za-z0-9_./:@-]+$/.test(value)) {
+    return value;
+  }
+  return JSON.stringify(value);
+}
+
 function renderEnvFile(entries: Record<string, string>) {
  const lines = [
    "# Paperclip environment variables",
    "# Generated by Paperclip CLI commands",
-    ...Object.entries(entries).map(([key, value]) => `${key}=${value}`),
+    ...Object.entries(entries).map(([key, value]) => `${key}=${formatEnvValue(value)}`),
    "",
  ];
  return lines.join("\n");
--- a/doc/DEVELOPING.md
+++ b/doc/DEVELOPING.md
@@ -142,7 +142,7 @@ This command:
 - creates an isolated instance under `~/.paperclip-worktrees/instances/<worktree-id>/`
 - when run inside a linked git worktree, mirrors the effective git hooks into that worktree's private git dir
 - picks a free app port and embedded PostgreSQL port
- by default seeds the isolated DB in `minimal` mode from your main instance via a logical SQL snapshot
+- by default seeds the isolated DB in `minimal` mode from the current effective Paperclip instance/config (repo-local worktree config when present, otherwise the default instance) via a logical SQL snapshot

 Seed modes:

@@ -152,7 +152,13 @@ Seed modes:

 After `worktree init`, both the server and the CLI auto-load the repo-local `.paperclip/.env` when run inside that worktree, so normal commands like `pnpm dev`, `paperclipai doctor`, and `paperclipai db:backup` stay scoped to the worktree instance.

-That repo-local env also sets `PAPERCLIP_IN_WORKTREE=true`, which the server can use for worktree-specific UI behavior such as an alternate favicon.
+That repo-local env also sets:
+
+- `PAPERCLIP_IN_WORKTREE=true`
+- `PAPERCLIP_WORKTREE_NAME=<worktree-name>`
+- `PAPERCLIP_WORKTREE_COLOR=<hex-color>`
+
+The server/UI use those values for worktree-specific branding such as the top banner and dynamically colored favicon.

 Print shell exports explicitly when needed:

--- a/doc/plans/2026-02-16-module-system.md
+++ b/doc/plans/2026-02-16-module-system.md
--- a/doc/plans/2026-02-18-agent-authentication-implementation.md
+++ b/doc/plans/2026-02-18-agent-authentication-implementation.md
--- a/doc/plans/2026-02-18-agent-authentication.md
+++ b/doc/plans/2026-02-18-agent-authentication.md
--- a/doc/plans/2026-02-19-agent-mgmt-followup-plan.md
+++ b/doc/plans/2026-02-19-agent-mgmt-followup-plan.md
--- a/doc/plans/2026-02-19-ceo-agent-creation-and-hiring.md
+++ b/doc/plans/2026-02-19-ceo-agent-creation-and-hiring.md
--- a/doc/plans/2026-02-20-issue-run-orchestration-plan.md
+++ b/doc/plans/2026-02-20-issue-run-orchestration-plan.md
--- a/doc/plans/2026-02-20-storage-system-implementation.md
+++ b/doc/plans/2026-02-20-storage-system-implementation.md
--- a/doc/plans/2026-02-21-humans-and-permissions-implementation.md
+++ b/doc/plans/2026-02-21-humans-and-permissions-implementation.md
--- a/doc/plans/2026-02-21-humans-and-permissions.md
+++ b/doc/plans/2026-02-21-humans-and-permissions.md
--- a/doc/plans/2026-02-23-cursor-cloud-adapter.md
+++ b/doc/plans/2026-02-23-cursor-cloud-adapter.md
--- a/doc/plans/2026-02-23-deployment-auth-mode-consolidation.md
+++ b/doc/plans/2026-02-23-deployment-auth-mode-consolidation.md
--- a/doc/plans/2026-03-10-workspace-strategy-and-git-worktrees.md
+++ b/doc/plans/2026-03-10-workspace-strategy-and-git-worktrees.md
--- a/doc/plans/2026-03-11-agent-chat-ui-and-issue-backed-conversations.md
+++ b/doc/plans/2026-03-11-agent-chat-ui-and-issue-backed-conversations.md
--- a/doc/plans/2026-03-13-TOKEN-OPTIMIZATION-PLAN.md
+++ b/doc/plans/2026-03-13-TOKEN-OPTIMIZATION-PLAN.md
@@ -0,0 +1,397 @@
+# Token Optimization Plan
+
+Date: 2026-03-13  
+Related discussion: https://github.com/paperclipai/paperclip/discussions/449
+
+## Goal
+
+Reduce token consumption materially without reducing agent capability, control-plane visibility, or task completion quality.
+
+This plan is based on:
+
+- the current V1 control-plane design
+- the current adapter and heartbeat implementation
+- the linked user discussion
+- local runtime data from the default Paperclip instance on 2026-03-13
+
+## Executive Summary
+
+The discussion is directionally right about two things:
+
+1. We should preserve session and prompt-cache locality more aggressively.
+2. We should separate stable startup instructions from per-heartbeat dynamic context.
+
+But that is not enough on its own.
+
+After reviewing the code and local run data, the token problem appears to have four distinct causes:
+
+1. **Measurement inflation on sessioned adapters.** Some token counters, especially for `codex_local`, appear to be recorded as cumulative session totals instead of per-heartbeat deltas.
+2. **Avoidable session resets.** Task sessions are intentionally reset on timer wakes and manual wakes, which destroys cache locality for common heartbeat paths.
+3. **Repeated context reacquisition.** The `paperclip` skill tells agents to re-fetch assignments, issue details, ancestors, and full comment threads on every heartbeat. The API does not currently offer efficient delta-oriented alternatives.
+4. **Large static instruction surfaces.** Agent instruction files and globally injected skills are reintroduced at startup even when most of that content is unchanged and not needed for the current task.
+
+The correct approach is:
+
+1. fix telemetry so we can trust the numbers
+2. preserve reuse where it is safe
+3. make context retrieval incremental
+4. add session compaction/rotation so long-lived sessions do not become progressively more expensive
+
+## Validated Findings
+
+### 1. Token telemetry is at least partly overstated today
+
+Observed from the local default instance:
+
+- `heartbeat_runs`: 11,360 runs between 2026-02-18 and 2026-03-13
+- summed `usage_json.inputTokens`: `2,272,142,368,952`
+- summed `usage_json.cachedInputTokens`: `2,217,501,559,420`
+
+Those totals are not credible as true per-heartbeat usage for the observed prompt sizes.
+
+Supporting evidence:
+
+- `adapter.invoke.payload.prompt` averages were small:
+  - `codex_local`: ~193 chars average, 6,067 chars max
+  - `claude_local`: ~160 chars average, 1,160 chars max
+- despite that, many `codex_local` runs report millions of input tokens
+- one reused Codex session in local data spans 3,607 runs and recorded `inputTokens` growing up to `1,155,283,166`
+
+Interpretation:
+
+- for sessioned adapters, especially Codex, we are likely storing usage reported by the runtime as a **session total**, not a **per-run delta**
+- this makes trend reporting, optimization work, and customer trust worse
+
+This does **not** mean there is no real token problem. It means we need a trustworthy baseline before we can judge optimization impact.
+
+### 2. Timer wakes currently throw away reusable task sessions
+
+In `server/src/services/heartbeat.ts`, `shouldResetTaskSessionForWake(...)` returns `true` for:
+
+- `wakeReason === "issue_assigned"`
+- `wakeSource === "timer"`
+- manual on-demand wakes
+
+That means many normal heartbeats skip saved task-session resume even when the workspace is stable.
+
+Local data supports the impact:
+
+- `timer/system` runs: 6,587 total
+- only 976 had a previous session
+- only 963 ended with the same session
+
+So timer wakes are the largest heartbeat path and are mostly not resuming prior task state.
+
+### 3. We repeatedly ask agents to reload the same task context
+
+The `paperclip` skill currently tells agents to do this on essentially every heartbeat:
+
+- fetch assignments
+- fetch issue details
+- fetch ancestor chain
+- fetch full issue comments
+
+Current API shape reinforces that pattern:
+
+- `GET /api/issues/:id/comments` returns the full thread
+- there is no `since`, cursor, digest, or summary endpoint for heartbeat consumption
+- `GET /api/issues/:id` returns full enriched issue context, not a minimal delta payload
+
+This is safe but expensive. It forces the model to repeatedly consume unchanged information.
+
+### 4. Static instruction payloads are not separated cleanly from dynamic heartbeat prompts
+
+The user discussion suggested a bootstrap prompt. That is the right direction.
+
+Current state:
+
+- the UI exposes `bootstrapPromptTemplate`
+- adapter execution paths do not currently use it
+- several adapters prepend `instructionsFilePath` content directly into the per-run prompt or system prompt
+
+Result:
+
+- stable instructions are re-sent or re-applied in the same path as dynamic heartbeat content
+- we are not deliberately optimizing for provider prompt caching
+
+### 5. We inject more skill surface than most agents need
+
+Local adapters inject repo skills into runtime skill directories.
+
+Important `codex_local` nuance:
+
+- Codex does not read skills directly from the active worktree.
+- Paperclip discovers repo skills from the current checkout, then symlinks them into `$CODEX_HOME/skills` or `~/.codex/skills`.
+- If an existing Paperclip skill symlink already points at another live checkout, the current implementation skips it instead of repointing it.
+- This can leave Codex using stale skill content from a different worktree even after Paperclip-side skill changes land.
+- That is both a correctness risk and a token-analysis risk, because runtime behavior may not reflect the instructions in the checkout being tested.
+
+Current repo skill sizes:
+
+- `skills/paperclip/SKILL.md`: 17,441 bytes
+- `.agents/skills/create-agent-adapter/SKILL.md`: 31,832 bytes
+- `skills/paperclip-create-agent/SKILL.md`: 4,718 bytes
+- `skills/para-memory-files/SKILL.md`: 3,978 bytes
+
+That is nearly 58 KB of skill markdown before any company-specific instructions.
+
+Not all of that is necessarily loaded into model context every run, but it increases startup surface area and should be treated as a token budget concern.
+
+## Principles
+
+We should optimize tokens under these rules:
+
+1. **Do not lose functionality.** Agents must still be able to resume work safely, understand why tasks exist, and act within governance rules.
+2. **Prefer stable context over repeated context.** Unchanged instructions should not be resent through the most expensive path.
+3. **Prefer deltas over full reloads.** Heartbeats should consume only what changed since the last useful run.
+4. **Measure normalized deltas, not raw adapter claims.** Especially for sessioned CLIs.
+5. **Keep escape hatches.** Board/manual runs may still want a forced fresh session.
+
+## Plan
+
+## Phase 1: Make token telemetry trustworthy
+
+This should happen first.
+
+### Changes
+
+- Store both:
+  - raw adapter-reported usage
+  - Paperclip-normalized per-run usage
+- For sessioned adapters, compute normalized deltas against prior usage for the same persisted session.
+- Add explicit fields for:
+  - `sessionReused`
+  - `taskSessionReused`
+  - `promptChars`
+  - `instructionsChars`
+  - `hasInstructionsFile`
+  - `skillSetHash` or skill count
+  - `contextFetchMode` (`full`, `delta`, `summary`)
+- Add per-adapter parser tests that distinguish cumulative-session counters from per-run counters.
+
+### Why
+
+Without this, we cannot tell whether a reduction came from a real optimization or a reporting artifact.
+
+### Success criteria
+
+- per-run token totals stop exploding on long-lived sessions
+- a resumed session’s usage curve is believable and monotonic at the session level, but not double-counted at the run level
+- cost pages can show both raw and normalized numbers while we migrate
+
+## Phase 2: Preserve safe session reuse by default
+
+This is the highest-leverage behavior change.
+
+### Changes
+
+- Stop resetting task sessions on ordinary timer wakes.
+- Keep resetting on:
+  - explicit manual “fresh run” invocations
+  - assignment changes
+  - workspace mismatch
+  - model mismatch / invalid resume errors
+- Add an explicit wake flag like `forceFreshSession: true` when the board wants a reset.
+- Record why a session was reused or reset in run metadata.
+
+### Why
+
+Timer wakes are the dominant heartbeat path. Resetting them destroys both session continuity and prompt cache reuse.
+
+### Success criteria
+
+- timer wakes resume the prior task session in the large majority of stable-workspace cases
+- no increase in stale-session failures
+- lower normalized input tokens per timer heartbeat
+
+## Phase 3: Separate static bootstrap context from per-heartbeat context
+
+This is the right version of the discussion’s bootstrap idea.
+
+### Changes
+
+- Implement `bootstrapPromptTemplate` in adapter execution paths.
+- Use it only when starting a fresh session, not on resumed sessions.
+- Keep `promptTemplate` intentionally small and stable:
+  - who I am
+  - what triggered this wake
+  - which task/comment/approval to prioritize
+- Move long-lived setup text out of recurring per-run prompts where possible.
+- Add UI guidance and warnings when `promptTemplate` contains high-churn or large inline content.
+
+### Why
+
+Static instructions and dynamic wake context have different cache behavior and should be modeled separately.
+
+For `codex_local`, this also requires isolating the Codex skill home per worktree or teaching Paperclip to repoint its own skill symlinks when the source checkout changes. Otherwise prompt and skill improvements in the active worktree may not reach the running agent.
+
+### Success criteria
+
+- fresh-session prompts can remain richer without inflating every resumed heartbeat
+- resumed prompts become short and structurally stable
+- cache hit rates improve for session-preserving adapters
+
+## Phase 4: Make issue/task context incremental
+
+This is the biggest product change and likely the biggest real token saver after session reuse.
+
+### Changes
+
+Add heartbeat-oriented endpoints and skill behavior:
+
+- `GET /api/agents/me/inbox-lite`
+  - minimal assignment list
+  - issue id, identifier, status, priority, updatedAt, lastExternalCommentAt
+- `GET /api/issues/:id/heartbeat-context`
+  - compact issue state
+  - parent-chain summary
+  - latest execution summary
+  - change markers
+- `GET /api/issues/:id/comments?after=<cursor>` or `?since=<timestamp>`
+  - return only new comments
+- optional `GET /api/issues/:id/context-digest`
+  - server-generated compact summary for heartbeat use
+
+Update the `paperclip` skill so the default pattern becomes:
+
+1. fetch compact inbox
+2. fetch compact task context
+3. fetch only new comments unless this is the first read, a mention-triggered wake, or a cache miss
+4. fetch full thread only on demand
+
+### Why
+
+Today we are using full-fidelity board APIs as heartbeat APIs. That is convenient but token-inefficient.
+
+### Success criteria
+
+- after first task acquisition, most heartbeats consume only deltas
+- repeated blocked-task or long-thread work no longer replays the whole comment history
+- mention-triggered wakes still have enough context to respond correctly
+
+## Phase 5: Add session compaction and controlled rotation
+
+This protects against long-lived session bloat.
+
+### Changes
+
+- Add rotation thresholds per adapter/session:
+  - turns
+  - normalized input tokens
+  - age
+  - cache hit degradation
+- Before rotating, produce a structured carry-forward summary:
+  - current objective
+  - work completed
+  - open decisions
+  - blockers
+  - files/artifacts touched
+  - next recommended action
+- Persist that summary in task session state or runtime state.
+- Start the next session with:
+  - bootstrap prompt
+  - compact carry-forward summary
+  - current wake trigger
+
+### Why
+
+Even when reuse is desirable, some sessions become too expensive to keep alive indefinitely.
+
+### Success criteria
+
+- very long sessions stop growing without bound
+- rotating a session does not cause loss of task continuity
+- successful task completion rate stays flat or improves
+
+## Phase 6: Reduce unnecessary skill surface
+
+### Changes
+
+- Move from “inject all repo skills” to an allowlist per agent or per adapter.
+- Default local runtime skill set should likely be:
+  - `paperclip`
+- Add opt-in skills for specialized agents:
+  - `paperclip-create-agent`
+  - `para-memory-files`
+  - `create-agent-adapter`
+- Expose active skill set in agent config and run metadata.
+- For `codex_local`, either:
+  - run with a worktree-specific `CODEX_HOME`, or
+  - treat Paperclip-owned Codex skill symlinks as repairable when they point at a different checkout
+
+### Why
+
+Most agents do not need adapter-authoring or memory-system skills on every run.
+
+### Success criteria
+
+- smaller startup instruction surface
+- no loss of capability for specialist agents that explicitly need extra skills
+
+## Rollout Order
+
+Recommended order:
+
+1. telemetry normalization
+2. timer-wake session reuse
+3. bootstrap prompt implementation
+4. heartbeat delta APIs + `paperclip` skill rewrite
+5. session compaction/rotation
+6. skill allowlists
+
+## Acceptance Metrics
+
+We should treat this plan as successful only if we improve both efficiency and task outcomes.
+
+Primary metrics:
+
+- normalized input tokens per successful heartbeat
+- normalized input tokens per completed issue
+- cache-hit ratio for sessioned adapters
+- session reuse rate by invocation source
+- fraction of heartbeats that fetch full comment threads
+
+Guardrail metrics:
+
+- task completion rate
+- blocked-task rate
+- stale-session failure rate
+- manual intervention rate
+- issue reopen rate after agent completion
+
+Initial targets:
+
+- 30% to 50% reduction in normalized input tokens per successful resumed heartbeat
+- 80%+ session reuse on stable timer wakes
+- 80%+ reduction in full-thread comment reloads after first task read
+- no statistically meaningful regression in completion rate or failure rate
+
+## Concrete Engineering Tasks
+
+1. Add normalized usage fields and migration support for run analytics.
+2. Patch sessioned adapter accounting to compute deltas from prior session totals.
+3. Change `shouldResetTaskSessionForWake(...)` so timer wakes do not reset by default.
+4. Implement `bootstrapPromptTemplate` end-to-end in adapter execution.
+5. Add compact heartbeat context and incremental comment APIs.
+6. Rewrite `skills/paperclip/SKILL.md` around delta-fetch behavior.
+7. Add session rotation with carry-forward summaries.
+8. Replace global skill injection with explicit allowlists.
+9. Fix `codex_local` skill resolution so worktree-local skill changes reliably reach the runtime.
+
+## Recommendation
+
+Treat this as a two-track effort:
+
+- **Track A: correctness and no-regret wins**
+  - telemetry normalization
+  - timer-wake session reuse
+  - bootstrap prompt implementation
+- **Track B: structural token reduction**
+  - delta APIs
+  - skill rewrite
+  - session compaction
+  - skill allowlists
+
+If we only do Track A, we will improve things, but agents will still re-read too much unchanged task context.
+
+If we only do Track B without fixing telemetry first, we will not be able to prove the gains cleanly.
--- a/doc/plans/2026-03-13-agent-evals-framework.md
+++ b/doc/plans/2026-03-13-agent-evals-framework.md
@@ -0,0 +1,775 @@
+# Agent Evals Framework Plan
+
+Date: 2026-03-13
+
+## Context
+
+We need evals for the thing Paperclip actually ships:
+
+- agent behavior produced by adapter config
+- prompt templates and bootstrap prompts
+- skill sets and skill instructions
+- model choice
+- runtime policy choices that affect outcomes and cost
+
+We do **not** primarily need a fine-tuning pipeline.
+We need a regression framework that can answer:
+
+- if we change prompts or skills, do agents still do the right thing?
+- if we switch models, what got better, worse, or more expensive?
+- if we optimize tokens, did we preserve task outcomes?
+- can we grow the suite over time from real Paperclip usage?
+
+This plan is based on:
+
+- `doc/GOAL.md`
+- `doc/PRODUCT.md`
+- `doc/SPEC-implementation.md`
+- `docs/agents-runtime.md`
+- `doc/plans/2026-03-13-TOKEN-OPTIMIZATION-PLAN.md`
+- Discussion #449: <https://github.com/paperclipai/paperclip/discussions/449>
+- OpenAI eval best practices: <https://developers.openai.com/api/docs/guides/evaluation-best-practices>
+- Promptfoo docs: <https://www.promptfoo.dev/docs/configuration/test-cases/> and <https://www.promptfoo.dev/docs/providers/custom-api/>
+- LangSmith complex agent eval docs: <https://docs.langchain.com/langsmith/evaluate-complex-agent>
+- Braintrust dataset/scorer docs: <https://www.braintrust.dev/docs/annotate/datasets> and <https://www.braintrust.dev/docs/evaluate/write-scorers>
+
+## Recommendation
+
+Paperclip should take a **two-stage approach**:
+
+1. **Start with Promptfoo now** for narrow, prompt-and-skill behavior evals across models.
+2. **Grow toward a first-party, repo-local eval harness in TypeScript** for full Paperclip scenario evals.
+
+So the recommendation is no longer “skip Promptfoo.” It is:
+
+- use Promptfoo as the fastest bootstrap layer
+- keep eval cases and fixtures in this repo
+- avoid making Promptfoo config the deepest long-term abstraction
+
+More specifically:
+
+1. The canonical eval definitions should live in this repo under a top-level `evals/` directory.
+2. `v0` should use Promptfoo to run focused test cases across models and providers.
+3. The longer-term harness should run **real Paperclip scenarios** against seeded companies/issues/agents, not just raw prompt completions.
+4. The scoring model should combine:
+   - deterministic checks
+   - structured rubric scoring
+   - pairwise candidate-vs-baseline judging
+   - efficiency metrics from normalized usage/cost telemetry
+5. The framework should compare **bundles**, not just models.
+
+A bundle is:
+
+- adapter type
+- model id
+- prompt template(s)
+- bootstrap prompt template
+- skill allowlist / skill content version
+- relevant runtime flags
+
+That is the right unit because that is what actually changes behavior in Paperclip.
+
+## Why This Is The Right Shape
+
+### 1. We need to evaluate system behavior, not only prompt output
+
+Prompt-only tools are useful, but Paperclip’s real failure modes are often:
+
+- wrong issue chosen
+- wrong API call sequence
+- bad delegation
+- failure to respect approval boundaries
+- stale session behavior
+- over-reading context
+- claiming completion without producing artifacts or comments
+
+Those are control-plane behaviors. They require scenario setup, execution, and trace inspection.
+
+### 2. The repo is already TypeScript-first
+
+The existing monorepo already uses:
+
+- `pnpm`
+- `tsx`
+- `vitest`
+- TypeScript across server, UI, shared contracts, and adapters
+
+A TypeScript-first harness will fit the repo and CI better than introducing a Python-first test subsystem as the default path.
+
+Python can stay optional later for specialty scorers or research experiments.
+
+### 3. We need provider/model comparison without vendor lock-in
+
+OpenAI’s guidance is directionally right:
+
+- eval early and often
+- use task-specific evals
+- log everything
+- prefer pairwise/comparison-style judging over open-ended scoring
+
+But OpenAI’s Evals API is not the right control plane for Paperclip as the primary system because our target is explicitly multi-model and multi-provider.
+
+### 4. Hosted eval products are useful, and Promptfoo is the right bootstrap tool
+
+The current tradeoff:
+
+- Promptfoo is very good for local, repo-based prompt/provider matrices and CI integration.
+- LangSmith is strong on trajectory-style agent evals.
+- Braintrust has a clean dataset + scorer + experiment model and strong TypeScript support.
+
+The community suggestion is directionally right:
+
+- Promptfoo lets us start small
+- it supports simple assertions like contains / not-contains / regex / custom JS
+- it can run the same cases across multiple models
+- it supports OpenRouter
+- it can move into CI later
+
+That makes it the best `v0` tool for “did this prompt/skill/model change obviously regress?”
+
+But Paperclip should still avoid making a hosted platform or a third-party config format the core abstraction before we have our own stable eval model.
+
+The right move is:
+
+- start with Promptfoo for quick wins
+- keep the data portable and repo-owned
+- build a thin first-party harness around Paperclip concepts as the system grows
+- optionally export to or integrate with other tools later if useful
+
+## What We Should Evaluate
+
+We should split evals into four layers.
+
+### Layer 1: Deterministic contract evals
+
+These should require no judge model.
+
+Examples:
+
+- agent comments on the assigned issue
+- no mutation outside the agent’s company
+- approval-required actions do not bypass approval flow
+- task transitions are legal
+- output contains required structured fields
+- artifact links exist when the task required an artifact
+- no full-thread refetch on delta-only cases once the API supports it
+
+These are cheap, reliable, and should be the first line of defense.
+
+### Layer 2: Single-step behavior evals
+
+These test narrow behaviors in isolation.
+
+Examples:
+
+- chooses the correct issue from inbox
+- writes a reasonable first status comment
+- decides to ask for approval instead of acting directly
+- delegates to the correct report
+- recognizes blocked state and reports it clearly
+
+These are the closest thing to prompt evals, but still framed in Paperclip terms.
+
+### Layer 3: End-to-end scenario evals
+
+These run a full heartbeat or short sequence of heartbeats against a seeded scenario.
+
+Examples:
+
+- new assignment pickup
+- long-thread continuation
+- mention-triggered clarification
+- approval-gated hire request
+- manager escalation
+- workspace coding task that must leave a meaningful issue update
+
+These should evaluate both final state and trace quality.
+
+### Layer 4: Efficiency and regression evals
+
+These are not “did the answer look good?” evals. They are “did we preserve quality while improving cost/latency?” evals.
+
+Examples:
+
+- normalized input tokens per successful heartbeat
+- normalized tokens per completed issue
+- session reuse rate
+- full-thread reload rate
+- wall-clock duration
+- cost per successful scenario
+
+This layer is especially important for token optimization work.
+
+## Core Design
+
+## 1. Canonical object: `EvalCase`
+
+Each eval case should define:
+
+- scenario setup
+- target bundle(s)
+- execution mode
+- expected invariants
+- scoring rubric
+- tags/metadata
+
+Suggested shape:
+
+```ts
+type EvalCase = {
+  id: string;
+  description: string;
+  tags: string[];
+  setup: {
+    fixture: string;
+    agentId: string;
+    trigger: "assignment" | "timer" | "on_demand" | "comment" | "approval";
+  };
+  inputs?: Record<string, unknown>;
+  checks: {
+    hard: HardCheck[];
+    rubric?: RubricCheck[];
+    pairwise?: PairwiseCheck[];
+  };
+  metrics: MetricSpec[];
+};
+```
+
+The important part is that the case is about a Paperclip scenario, not a standalone prompt string.
+
+## 2. Canonical object: `EvalBundle`
+
+Suggested shape:
+
+```ts
+type EvalBundle = {
+  id: string;
+  adapter: string;
+  model: string;
+  promptTemplate: string;
+  bootstrapPromptTemplate?: string;
+  skills: string[];
+  flags?: Record<string, string | number | boolean>;
+};
+```
+
+Every comparison run should say which bundle was tested.
+
+This avoids the common mistake of saying “model X is better” when the real change was model + prompt + skills + runtime behavior.
+
+## 3. Canonical output: `EvalTrace`
+
+We should capture a normalized trace for scoring:
+
+- run ids
+- prompts actually sent
+- session reuse metadata
+- issue mutations
+- comments created
+- approvals requested
+- artifacts created
+- token/cost telemetry
+- timing
+- raw outputs
+
+The scorer layer should never need to scrape ad hoc logs.
+
+## Scoring Framework
+
+## 1. Hard checks first
+
+Every eval should start with pass/fail checks that can invalidate the run immediately.
+
+Examples:
+
+- touched wrong company
+- skipped required approval
+- no issue update produced
+- returned malformed structured output
+- marked task done without required artifact
+
+If a hard check fails, the scenario fails regardless of style or judge score.
+
+## 2. Rubric scoring second
+
+Rubric scoring should use narrow criteria, not vague “how good was this?” prompts.
+
+Good rubric dimensions:
+
+- task understanding
+- governance compliance
+- useful progress communication
+- correct delegation
+- evidence of completion
+- concision / unnecessary verbosity
+
+Each rubric should be a small 0-1 or 0-2 decision, not a mushy 1-10 scale.
+
+## 3. Pairwise judging for candidate vs baseline
+
+OpenAI’s eval guidance is right that LLMs are better at discrimination than open-ended generation.
+
+So for non-deterministic quality checks, the default pattern should be:
+
+- run baseline bundle on the case
+- run candidate bundle on the same case
+- ask a judge model which is better on explicit criteria
+- allow `baseline`, `candidate`, or `tie`
+
+This is better than asking a judge for an absolute quality score with no anchor.
+
+## 4. Efficiency scoring is separate
+
+Do not bury efficiency inside a single blended quality score.
+
+Record it separately:
+
+- quality score
+- cost score
+- latency score
+
+Then compute a summary decision such as:
+
+- candidate is acceptable only if quality is non-inferior and efficiency is improved
+
+That is much easier to reason about than one magic number.
+
+## Suggested Decision Rule
+
+For PR gating:
+
+1. No hard-check regressions.
+2. No significant regression on required scenario pass rate.
+3. No significant regression on key rubric dimensions.
+4. If the change is token-optimization-oriented, require efficiency improvement on target scenarios.
+
+For deeper comparison reports, show:
+
+- pass rate
+- pairwise wins/losses/ties
+- median normalized tokens
+- median wall-clock time
+- cost deltas
+
+## Dataset Strategy
+
+We should explicitly build the dataset from three sources.
+
+### 1. Hand-authored seed cases
+
+Start here.
+
+These should cover core product invariants:
+
+- assignment pickup
+- status update
+- blocked reporting
+- delegation
+- approval request
+- cross-company access denial
+- issue comment follow-up
+
+These are small, clear, and stable.
+
+### 2. Production-derived cases
+
+Per OpenAI’s guidance, we should log everything and mine real usage for eval cases.
+
+Paperclip should grow eval coverage by promoting real runs into cases when we see:
+
+- regressions
+- interesting failures
+- edge cases
+- high-value success patterns worth preserving
+
+The initial version can be manual:
+
+- take a real run
+- redact/normalize it
+- convert it into an `EvalCase`
+
+Later we can automate trace-to-case generation.
+
+### 3. Adversarial and guardrail cases
+
+These should intentionally probe failure modes:
+
+- approval bypass attempts
+- wrong-company references
+- stale context traps
+- irrelevant long threads
+- misleading instructions in comments
+- verbosity traps
+
+This is where promptfoo-style red-team ideas can become useful later, but it is not the first slice.
+
+## Repo Layout
+
+Recommended initial layout:
+
+```text
+evals/
+  README.md
+  promptfoo/
+    promptfooconfig.yaml
+    prompts/
+    cases/
+  cases/
+    core/
+    approvals/
+    delegation/
+    efficiency/
+  fixtures/
+    companies/
+    issues/
+  bundles/
+    baseline/
+    experiments/
+  runners/
+    scenario-runner.ts
+    compare-runner.ts
+  scorers/
+    hard/
+    rubric/
+    pairwise/
+  judges/
+    rubric-judge.ts
+    pairwise-judge.ts
+  lib/
+    types.ts
+    traces.ts
+    metrics.ts
+  reports/
+    .gitignore
+```
+
+Why top-level `evals/`:
+
+- it makes evals feel first-class
+- it avoids hiding them inside `server/` even though they span adapters and runtime behavior
+- it leaves room for both TS and optional Python helpers later
+- it gives us a clean place for Promptfoo `v0` config plus the later first-party runner
+
+## Execution Model
+
+The harness should support three modes.
+
+### Mode A: Cheap local smoke
+
+Purpose:
+
+- run on PRs
+- keep cost low
+- catch obvious regressions
+
+Characteristics:
+
+- 5 to 20 cases
+- 1 or 2 bundles
+- mostly hard checks and narrow rubrics
+
+### Mode B: Candidate vs baseline compare
+
+Purpose:
+
+- evaluate a prompt/skill/model change before merge
+
+Characteristics:
+
+- paired runs
+- pairwise judging enabled
+- quality + efficiency diff report
+
+### Mode C: Nightly broader matrix
+
+Purpose:
+
+- compare multiple models and bundles
+- grow historical benchmark data
+
+Characteristics:
+
+- larger case set
+- multiple models
+- more expensive rubric/pairwise judging
+
+## CI and Developer Workflow
+
+Suggested commands:
+
+```sh
+pnpm evals:smoke
+pnpm evals:compare --baseline baseline/codex-default --candidate experiments/codex-lean-skillset
+pnpm evals:nightly
+```
+
+PR behavior:
+
+- run `evals:smoke` on prompt/skill/adapter/runtime changes
+- optionally trigger `evals:compare` for labeled PRs or manual runs
+
+Nightly behavior:
+
+- run larger matrix
+- save report artifact
+- surface trend lines on pass rate, pairwise wins, and efficiency
+
+## Framework Comparison
+
+## Promptfoo
+
+Best use for Paperclip:
+
+- prompt-level micro-evals
+- provider/model comparison
+- quick local CI integration
+- custom JS assertions and custom providers
+- bootstrap-layer evals for one skill or one agent workflow
+
+What changed in this recommendation:
+
+- Promptfoo is now the recommended **starting point**
+- especially for “one skill, a handful of cases, compare across models”
+
+Why it still should not be the only long-term system:
+
+- its primary abstraction is still prompt/provider/test-case oriented
+- Paperclip needs scenario setup, control-plane state inspection, and multi-step traces as first-class concepts
+
+Recommendation:
+
+- use Promptfoo first
+- store Promptfoo config and cases in-repo under `evals/promptfoo/`
+- use custom JS/TS assertions and, if needed later, a custom provider that calls Paperclip scenario runners
+- do not make Promptfoo YAML the only canonical Paperclip eval format once we outgrow prompt-level evals
+
+## LangSmith
+
+What it gets right:
+
+- final response evals
+- trajectory evals
+- single-step evals
+
+Why not the primary system today:
+
+- stronger fit for teams already centered on LangChain/LangGraph
+- introduces hosted/external workflow gravity before our own eval model is stable
+
+Recommendation:
+
+- copy the trajectory/final/single-step taxonomy
+- do not adopt the platform as the default requirement
+
+## Braintrust
+
+What it gets right:
+
+- TypeScript support
+- clean dataset/task/scorer model
+- production logging to datasets
+- experiment comparison over time
+
+Why not the primary system today:
+
+- still externalizes the canonical dataset and review workflow
+- we are not yet at the maturity where hosted experiment management should define the shape of the system
+
+Recommendation:
+
+- borrow its dataset/scorer/experiment mental model
+- revisit once we want hosted review and experiment history at scale
+
+## OpenAI Evals / Evals API
+
+What it gets right:
+
+- strong eval principles
+- emphasis on task-specific evals
+- continuous evaluation mindset
+
+Why not the primary system:
+
+- Paperclip must compare across models/providers
+- we do not want our primary eval runner coupled to one model vendor
+
+Recommendation:
+
+- use the guidance
+- do not use it as the core Paperclip eval runtime
+
+## First Implementation Slice
+
+The first version should be intentionally small.
+
+## Phase 0: Promptfoo bootstrap
+
+Build:
+
+- `evals/promptfoo/promptfooconfig.yaml`
+- 5 to 10 focused cases for one skill or one agent workflow
+- model matrix using the providers we care about most
+- mostly deterministic assertions:
+  - contains
+  - not-contains
+  - regex
+  - custom JS assertions
+
+Target scope:
+
+- one skill, or one narrow workflow such as assignment pickup / first status update
+- compare a small set of bundles across several models
+
+Success criteria:
+
+- we can run one command and compare outputs across models
+- prompt/skill regressions become visible quickly
+- the team gets signal before building heavier infrastructure
+
+## Phase 1: Skeleton and core cases
+
+Build:
+
+- `evals/` scaffold
+- `EvalCase`, `EvalBundle`, `EvalTrace` types
+- scenario runner for seeded local cases
+- 10 hand-authored core cases
+- hard checks only
+
+Target cases:
+
+- assigned issue pickup
+- write progress comment
+- ask for approval when required
+- respect company boundary
+- report blocked state
+- avoid marking done without artifact/comment evidence
+
+Success criteria:
+
+- a developer can run a local smoke suite
+- prompt/skill changes can fail the suite deterministically
+- Promptfoo `v0` cases either migrate into or coexist with this layer cleanly
+
+## Phase 2: Pairwise and rubric layer
+
+Build:
+
+- rubric scorer interface
+- pairwise judge runner
+- candidate vs baseline compare command
+- markdown/html report output
+
+Success criteria:
+
+- model/prompt bundle changes produce a readable diff report
+- we can tell “better”, “worse”, or “same” on curated scenarios
+
+## Phase 3: Efficiency integration
+
+Build:
+
+- normalized token/cost metrics into eval traces
+- cost and latency comparisons
+- efficiency gates for token optimization work
+
+Dependency:
+
+- this should align with the telemetry normalization work in `2026-03-13-TOKEN-OPTIMIZATION-PLAN.md`
+
+Success criteria:
+
+- quality and efficiency can be judged together
+- token-reduction work no longer relies on anecdotal improvements
+
+## Phase 4: Production-case ingestion
+
+Build:
+
+- tooling to promote real runs into new eval cases
+- metadata tagging
+- failure corpus growth process
+
+Success criteria:
+
+- the eval suite grows from real product behavior instead of staying synthetic
+
+## Initial Case Categories
+
+We should start with these categories:
+
+1. `core.assignment_pickup`
+2. `core.progress_update`
+3. `core.blocked_reporting`
+4. `governance.approval_required`
+5. `governance.company_boundary`
+6. `delegation.correct_report`
+7. `threads.long_context_followup`
+8. `efficiency.no_unnecessary_reloads`
+
+That is enough to start catching the classes of regressions we actually care about.
+
+## Important Guardrails
+
+### 1. Do not rely on judge models alone
+
+Every important scenario needs deterministic checks first.
+
+### 2. Do not gate PRs on a single noisy score
+
+Use pass/fail invariants plus a small number of stable rubric or pairwise checks.
+
+### 3. Do not confuse benchmark score with product quality
+
+The suite must keep growing from real runs, otherwise it will become a toy benchmark.
+
+### 4. Do not evaluate only final output
+
+Trajectory matters for agents:
+
+- did they call the right Paperclip APIs?
+- did they ask for approval?
+- did they communicate progress?
+- did they choose the right issue?
+
+### 5. Do not make the framework vendor-shaped
+
+Our eval model should survive changes in:
+
+- judge provider
+- candidate provider
+- adapter implementation
+- hosted tooling choices
+
+## Open Questions
+
+1. Should the first scenario runner invoke the real server over HTTP, or call services directly in-process?
+   My recommendation: start in-process for speed, then add HTTP-mode coverage once the model stabilizes.
+
+2. Should we support Python scorers in v1?
+   My recommendation: no. Keep v1 all-TypeScript.
+
+3. Should we commit baseline outputs?
+   My recommendation: commit case definitions and bundle definitions, but keep run artifacts out of git.
+
+4. Should we add hosted experiment tracking immediately?
+   My recommendation: no. Revisit after the local harness proves useful.
+
+## Final Recommendation
+
+Start with Promptfoo for immediate, narrow model-and-prompt comparisons, then grow into a first-party `evals/` framework in TypeScript that evaluates **Paperclip scenarios and bundles**, not just prompts.
+
+Use this structure:
+
+- Promptfoo for `v0` bootstrap
+- deterministic hard checks as the foundation
+- rubric and pairwise judging for non-deterministic quality
+- normalized efficiency metrics as a separate axis
+- repo-local datasets that grow from real runs
+
+Use external tools selectively:
+
+- Promptfoo as the initial path for narrow prompt/provider tests
+- Braintrust or LangSmith later if we want hosted experiment management
+
+But keep the canonical eval model inside the Paperclip repo and aligned to Paperclip’s actual control-plane behaviors.
--- a/doc/plans/2026-03-13-paperclip-skill-tightening-plan.md
+++ b/doc/plans/2026-03-13-paperclip-skill-tightening-plan.md
@@ -0,0 +1,186 @@
+# Paperclip Skill Tightening Plan
+
+## Status
+
+Deferred follow-up. Do not include in the current token-optimization PR beyond documenting the plan.
+
+## Why This Is Deferred
+
+The `paperclip` skill is part of the critical control-plane safety surface. Tightening it may reduce fresh-session token use, but it also carries prompt-regression risk. We do not yet have evals that would let us safely prove behavior preservation across assignment handling, checkout rules, comment etiquette, approval workflows, and escalation paths.
+
+The current PR should ship the lower-risk infrastructure wins first:
+
+- telemetry normalization
+- safe session reuse
+- incremental issue/comment context
+- bootstrap versus heartbeat prompt separation
+- Codex worktree isolation
+
+## Current Problem
+
+Fresh runs still spend substantial input tokens even after the context-path fixes. The remaining large startup cost appears to come from loading the full `paperclip` skill and related instruction surface into context at run start.
+
+The skill currently mixes three kinds of content in one file:
+
+- hot-path heartbeat procedure used on nearly every run
+- critical policy and safety invariants
+- rare workflow/reference material that most runs do not need
+
+That structure is safe but expensive.
+
+## Goals
+
+- reduce first-run instruction tokens without weakening agent safety
+- preserve all current Paperclip control-plane capabilities
+- keep common heartbeat behavior explicit and easy for agents to follow
+- move rare workflows and reference material out of the hot path
+- create a structure that can later be evaluated systematically
+
+## Non-Goals
+
+- changing Paperclip API semantics
+- removing required governance rules
+- deleting rare workflows
+- changing agent defaults in the current PR
+
+## Recommended Direction
+
+### 1. Split Hot Path From Lookup Material
+
+Restructure the skill into:
+
+- an always-loaded core section for the common heartbeat loop
+- on-demand material for infrequent workflows and deep reference
+
+The core should cover only what is needed on nearly every wake:
+
+- auth and required headers
+- inbox-first assignment retrieval
+- mandatory checkout behavior
+- `heartbeat-context` first
+- incremental comment retrieval rules
+- mention/self-assign exception
+- blocked-task dedup
+- status/comment/release expectations before exit
+
+### 2. Normalize The Skill Around One Canonical Procedure
+
+The same rules are currently expressed multiple times across:
+
+- heartbeat steps
+- critical rules
+- endpoint reference
+- workflow examples
+
+Refactor so each operational fact has one primary home:
+
+- procedure
+- invariant list
+- appendix/reference
+
+This reduces prompt weight and lowers the chance of internal instruction drift.
+
+### 3. Compress Prose Into High-Signal Instruction Forms
+
+Rewrite the hot path using compact operational forms:
+
+- short ordered checklist
+- flat invariant list
+- minimal examples only where ambiguity would be risky
+
+Reduce:
+
+- narrative explanation
+- repeated warnings already covered elsewhere
+- large example payloads for common operations
+- long endpoint matrices in the main body
+
+### 4. Move Rare Workflows Behind Explicit Triggers
+
+These workflows should remain available but should not dominate fresh-run context:
+
+- OpenClaw invite flow
+- project setup flow
+- planning `<plan/>` writeback flow
+- instructions-path update flow
+- detailed link-formatting examples
+
+Recommended approach:
+
+- keep a short pointer in the main skill
+- move detailed procedures into sibling skills or referenced docs that agents read only when needed
+
+### 5. Separate Policy From Reference
+
+The skill should distinguish:
+
+- mandatory operating rules
+- endpoint lookup/reference
+- business-process playbooks
+
+That separation makes it easier to evaluate prompt changes later and lets adapters or orchestration choose what must always be loaded.
+
+## Proposed Target Structure
+
+1. Purpose and authentication
+2. Compact heartbeat procedure
+3. Hard invariants
+4. Required comment/update style
+5. Triggered workflow index
+6. Appendix/reference
+
+## Rollout Plan
+
+### Phase 1. Inventory And Measure
+
+- annotate the current skill by section and estimate token weight
+- identify which sections are truly hot-path versus rare
+- capture representative runs to compare before/after prompt size and behavior
+
+### Phase 2. Structural Refactor Without Semantic Changes
+
+- rewrite the main skill into the target structure
+- preserve all existing rules and capabilities
+- move rare workflow details into referenced companion material
+- keep wording changes conservative
+
+### Phase 3. Validate Against Real Scenarios
+
+Run scenario checks for:
+
+- normal assigned heartbeat
+- comment-triggered wake
+- blocked-task dedup behavior
+- approval-resolution wake
+- delegation/subtask creation
+- board handoff back to user
+- plan-request handling
+
+### Phase 4. Decide Default Loading Strategy
+
+After validation, decide whether:
+
+- the entire main skill still loads by default, or
+- only the compact core loads by default and rare sections are fetched on demand
+
+Do not change this loading policy without validation.
+
+## Risks
+
+- prompt degradation on control-plane safety rules
+- agents forgetting rare but important workflows
+- accidental removal of repeated wording that was carrying useful behavior
+- introducing ambiguous instruction precedence between the core skill and companion materials
+
+## Preconditions Before Implementation
+
+- define acceptance scenarios for control-plane correctness
+- add at least lightweight eval or scripted scenario coverage for key Paperclip flows
+- confirm how adapter/bootstrap layering should load skill content versus references
+
+## Success Criteria
+
+- materially lower first-run input tokens for Paperclip-coordinated agents
+- no regression in checkout discipline, issue updates, blocked handling, or delegation
+- no increase in malformed API usage or ownership mistakes
+- agents still complete rare workflows correctly when explicitly asked
--- a/doc/plans/2026-03-13-workspace-product-model-and-work-product.md
+++ b/doc/plans/2026-03-13-workspace-product-model-and-work-product.md
--- a/docs/adapters/codex-local.md
+++ b/docs/adapters/codex-local.md
@@ -30,6 +30,8 @@ Codex uses `previous_response_id` for session continuity. The adapter serializes

 The adapter symlinks Paperclip skills into the global Codex skills directory (`~/.codex/skills`). Existing user skills are not overwritten.

+When Paperclip is running inside a managed worktree instance (`PAPERCLIP_IN_WORKTREE=true`), the adapter instead uses a worktree-isolated `CODEX_HOME` under the Paperclip instance so Codex skills, sessions, logs, and other runtime state do not leak across checkouts. It seeds that isolated home from the user's main Codex home for shared auth/config continuity.
+
 For manual local CLI usage outside heartbeat runs (for example running as `codexcoder` directly), use:

 ```sh
--- a/docs/adapters/creating-an-adapter.md
+++ b/docs/adapters/creating-an-adapter.md
@@ -6,7 +6,7 @@ summary: Guide to building a custom adapter
 Build a custom adapter to connect Paperclip to any agent runtime.

 <Tip>
-If you're using Claude Code, the `create-agent-adapter` skill can guide you through the full adapter creation process interactively. Just ask Claude to create a new adapter and it will walk you through each step.
+If you're using Claude Code, the `.agents/skills/create-agent-adapter` skill can guide you through the full adapter creation process interactively. Just ask Claude to create a new adapter and it will walk you through each step.
 </Tip>

 ## Package Structure
--- a/packages/adapter-utils/src/server-utils.ts
+++ b/packages/adapter-utils/src/server-utils.ts
@@ -112,6 +112,16 @@ export function renderTemplate(template: string, data: Record<string, unknown>)
  return template.replace(/{{\s*([a-zA-Z0-9_.-]+)\s*}}/g, (_, path) => resolvePathValue(data, path));
 }

+export function joinPromptSections(
+  sections: Array<string | null | undefined>,
+  separator = "\n\n",
+) {
+  return sections
+    .map((value) => (typeof value === "string" ? value.trim() : ""))
+    .filter(Boolean)
+    .join(separator);
+}
+
 export function redactEnvForLogs(env: Record<string, string>): Record<string, string> {
  const redacted: Record<string, string> = {};
  for (const [key, value] of Object.entries(env)) {
--- a/packages/adapter-utils/src/types.ts
+++ b/packages/adapter-utils/src/types.ts
@@ -99,6 +99,7 @@ export interface AdapterInvocationMeta {
  commandNotes?: string[];
  env?: Record<string, string>;
  prompt?: string;
+  promptMetrics?: Record<string, number>;
  context?: Record<string, unknown>;
 }

--- a/packages/adapters/claude-local/src/server/execute.ts
+++ b/packages/adapters/claude-local/src/server/execute.ts
@@ -12,6 +12,7 @@ import {
  parseObject,
  parseJson,
  buildPaperclipEnv,
+  joinPromptSections,
  redactEnvForLogs,
  ensureAbsoluteDirectory,
  ensureCommandResolvable,
@@ -363,7 +364,8 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
      `[paperclip] Claude session "${runtimeSessionId}" was saved for cwd "${runtimeSessionCwd}" and will not be resumed in "${cwd}".\n`,
    );
  }
-  const prompt = renderTemplate(promptTemplate, {
+  const bootstrapPromptTemplate = asString(config.bootstrapPromptTemplate, "");
+  const templateData = {
    agentId: agent.id,
    companyId: agent.companyId,
    runId,
@@ -371,7 +373,24 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    agent,
    run: { id: runId, source: "on_demand" },
    context,
-  });
+  };
+  const renderedPrompt = renderTemplate(promptTemplate, templateData);
+  const renderedBootstrapPrompt =
+    !sessionId && bootstrapPromptTemplate.trim().length > 0
+      ? renderTemplate(bootstrapPromptTemplate, templateData).trim()
+      : "";
+  const sessionHandoffNote = asString(context.paperclipSessionHandoffMarkdown, "").trim();
+  const prompt = joinPromptSections([
+    renderedBootstrapPrompt,
+    sessionHandoffNote,
+    renderedPrompt,
+  ]);
+  const promptMetrics = {
+    promptChars: prompt.length,
+    bootstrapPromptChars: renderedBootstrapPrompt.length,
+    sessionHandoffChars: sessionHandoffNote.length,
+    heartbeatPromptChars: renderedPrompt.length,
+  };

  const buildClaudeArgs = (resumeSessionId: string | null) => {
    const args = ["--print", "-", "--output-format", "stream-json", "--verbose"];
@@ -416,6 +435,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
        commandNotes,
        env: redactEnvForLogs(env),
        prompt,
+        promptMetrics,
        context,
      });
    }
--- a/packages/adapters/claude-local/src/ui/build-config.ts
+++ b/packages/adapters/claude-local/src/ui/build-config.ts
@@ -67,6 +67,7 @@ export function buildClaudeLocalConfig(v: CreateConfigValues): Record<string, un
  if (v.cwd) ac.cwd = v.cwd;
  if (v.instructionsFilePath) ac.instructionsFilePath = v.instructionsFilePath;
  if (v.promptTemplate) ac.promptTemplate = v.promptTemplate;
+  if (v.bootstrapPrompt) ac.bootstrapPromptTemplate = v.bootstrapPrompt;
  if (v.model) ac.model = v.model;
  if (v.thinkingEffort) ac.effort = v.thinkingEffort;
  if (v.chrome) ac.chrome = true;
--- a/packages/adapters/codex-local/src/server/codex-home.ts
+++ b/packages/adapters/codex-local/src/server/codex-home.ts
@@ -0,0 +1,101 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
+
+const TRUTHY_ENV_RE = /^(1|true|yes|on)$/i;
+const COPIED_SHARED_FILES = ["config.json", "config.toml", "instructions.md"] as const;
+const SYMLINKED_SHARED_FILES = ["auth.json"] as const;
+
+function nonEmpty(value: string | undefined): string | null {
+  return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
+}
+
+export async function pathExists(candidate: string): Promise<boolean> {
+  return fs.access(candidate).then(() => true).catch(() => false);
+}
+
+export function resolveCodexHomeDir(env: NodeJS.ProcessEnv = process.env): string {
+  const fromEnv = nonEmpty(env.CODEX_HOME);
+  if (fromEnv) return path.resolve(fromEnv);
+  return path.join(os.homedir(), ".codex");
+}
+
+function isWorktreeMode(env: NodeJS.ProcessEnv): boolean {
+  return TRUTHY_ENV_RE.test(env.PAPERCLIP_IN_WORKTREE ?? "");
+}
+
+function resolveWorktreeCodexHomeDir(env: NodeJS.ProcessEnv): string | null {
+  if (!isWorktreeMode(env)) return null;
+  const paperclipHome = nonEmpty(env.PAPERCLIP_HOME);
+  if (!paperclipHome) return null;
+  const instanceId = nonEmpty(env.PAPERCLIP_INSTANCE_ID);
+  if (instanceId) {
+    return path.resolve(paperclipHome, "instances", instanceId, "codex-home");
+  }
+  return path.resolve(paperclipHome, "codex-home");
+}
+
+async function ensureParentDir(target: string): Promise<void> {
+  await fs.mkdir(path.dirname(target), { recursive: true });
+}
+
+async function ensureSymlink(target: string, source: string): Promise<void> {
+  const existing = await fs.lstat(target).catch(() => null);
+  if (!existing) {
+    await ensureParentDir(target);
+    await fs.symlink(source, target);
+    return;
+  }
+
+  if (!existing.isSymbolicLink()) {
+    return;
+  }
+
+  const linkedPath = await fs.readlink(target).catch(() => null);
+  if (!linkedPath) return;
+
+  const resolvedLinkedPath = path.resolve(path.dirname(target), linkedPath);
+  if (resolvedLinkedPath === source) return;
+
+  await fs.unlink(target);
+  await fs.symlink(source, target);
+}
+
+async function ensureCopiedFile(target: string, source: string): Promise<void> {
+  const existing = await fs.lstat(target).catch(() => null);
+  if (existing) return;
+  await ensureParentDir(target);
+  await fs.copyFile(source, target);
+}
+
+export async function prepareWorktreeCodexHome(
+  env: NodeJS.ProcessEnv,
+  onLog: AdapterExecutionContext["onLog"],
+): Promise<string | null> {
+  const targetHome = resolveWorktreeCodexHomeDir(env);
+  if (!targetHome) return null;
+
+  const sourceHome = resolveCodexHomeDir(env);
+  if (path.resolve(sourceHome) === path.resolve(targetHome)) return targetHome;
+
+  await fs.mkdir(targetHome, { recursive: true });
+
+  for (const name of SYMLINKED_SHARED_FILES) {
+    const source = path.join(sourceHome, name);
+    if (!(await pathExists(source))) continue;
+    await ensureSymlink(path.join(targetHome, name), source);
+  }
+
+  for (const name of COPIED_SHARED_FILES) {
+    const source = path.join(sourceHome, name);
+    if (!(await pathExists(source))) continue;
+    await ensureCopiedFile(path.join(targetHome, name), source);
+  }
+
+  await onLog(
+    "stderr",
+    `[paperclip] Using worktree-isolated Codex home "${targetHome}" (seeded from "${sourceHome}").\n`,
+  );
+  return targetHome;
+}
--- a/packages/adapters/codex-local/src/server/execute.ts
+++ b/packages/adapters/codex-local/src/server/execute.ts
@@ -1,5 +1,4 @@
 import fs from "node:fs/promises";
-import os from "node:os";
 import path from "node:path";
 import { fileURLToPath } from "node:url";
 import type { AdapterExecutionContext, AdapterExecutionResult } from "@paperclipai/adapter-utils";
@@ -18,9 +17,11 @@ import {
  listPaperclipSkillEntries,
  removeMaintainerOnlySkillSymlinks,
  renderTemplate,
+  joinPromptSections,
  runChildProcess,
 } from "@paperclipai/adapter-utils/server-utils";
 import { parseCodexJsonl, isCodexUnknownSessionError } from "./parse.js";
+import { pathExists, prepareWorktreeCodexHome, resolveCodexHomeDir } from "./codex-home.js";

 const __moduleDir = path.dirname(fileURLToPath(import.meta.url));
 const CODEX_ROLLOUT_NOISE_RE =
@@ -60,10 +61,32 @@ function resolveCodexBillingType(env: Record<string, string>): "api" | "subscrip
  return hasNonEmptyEnvValue(env, "OPENAI_API_KEY") ? "api" : "subscription";
 }

-function codexHomeDir(): string {
-  const fromEnv = process.env.CODEX_HOME;
-  if (typeof fromEnv === "string" && fromEnv.trim().length > 0) return fromEnv.trim();
-  return path.join(os.homedir(), ".codex");
+async function isLikelyPaperclipRepoRoot(candidate: string): Promise<boolean> {
+  const [hasWorkspace, hasPackageJson, hasServerDir, hasAdapterUtilsDir] = await Promise.all([
+    pathExists(path.join(candidate, "pnpm-workspace.yaml")),
+    pathExists(path.join(candidate, "package.json")),
+    pathExists(path.join(candidate, "server")),
+    pathExists(path.join(candidate, "packages", "adapter-utils")),
+  ]);
+
+  return hasWorkspace && hasPackageJson && hasServerDir && hasAdapterUtilsDir;
+}
+
+async function isLikelyPaperclipRuntimeSkillSource(candidate: string, skillName: string): Promise<boolean> {
+  if (path.basename(candidate) !== skillName) return false;
+  const skillsRoot = path.dirname(candidate);
+  if (path.basename(skillsRoot) !== "skills") return false;
+  if (!(await pathExists(path.join(candidate, "SKILL.md")))) return false;
+
+  let cursor = path.dirname(skillsRoot);
+  for (let depth = 0; depth < 6; depth += 1) {
+    if (await isLikelyPaperclipRepoRoot(cursor)) return true;
+    const parent = path.dirname(cursor);
+    if (parent === cursor) break;
+    cursor = parent;
+  }
+
+  return false;
 }

 type EnsureCodexSkillsInjectedOptions = {
@@ -79,7 +102,7 @@ export async function ensureCodexSkillsInjected(
  const skillsEntries = options.skillsEntries ?? await listPaperclipSkillEntries(__moduleDir);
  if (skillsEntries.length === 0) return;

-  const skillsHome = options.skillsHome ?? path.join(codexHomeDir(), "skills");
+  const skillsHome = options.skillsHome ?? path.join(resolveCodexHomeDir(process.env), "skills");
  await fs.mkdir(skillsHome, { recursive: true });
  const removedSkills = await removeMaintainerOnlySkillSymlinks(
    skillsHome,
@@ -96,6 +119,31 @@ export async function ensureCodexSkillsInjected(
    const target = path.join(skillsHome, entry.name);

    try {
+      const existing = await fs.lstat(target).catch(() => null);
+      if (existing?.isSymbolicLink()) {
+        const linkedPath = await fs.readlink(target).catch(() => null);
+        const resolvedLinkedPath = linkedPath
+          ? path.resolve(path.dirname(target), linkedPath)
+          : null;
+        if (
+          resolvedLinkedPath &&
+          resolvedLinkedPath !== entry.source &&
+          (await isLikelyPaperclipRuntimeSkillSource(resolvedLinkedPath, entry.name))
+        ) {
+          await fs.unlink(target);
+          if (linkSkill) {
+            await linkSkill(entry.source, target);
+          } else {
+            await fs.symlink(entry.source, target);
+          }
+          await onLog(
+            "stderr",
+            `[paperclip] Repaired Codex skill "${entry.name}" into ${skillsHome}\n`,
+          );
+          continue;
+        }
+      }
+
      const result = await ensurePaperclipSkillSymlink(entry.source, target, linkSkill);
      if (result === "skipped") continue;

@@ -160,12 +208,25 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
  const useConfiguredInsteadOfAgentHome = workspaceSource === "agent_home" && configuredCwd.length > 0;
  const effectiveWorkspaceCwd = useConfiguredInsteadOfAgentHome ? "" : workspaceCwd;
  const cwd = effectiveWorkspaceCwd || configuredCwd || process.cwd();
-  await ensureAbsoluteDirectory(cwd, { createIfMissing: true });
-  await ensureCodexSkillsInjected(onLog);
  const envConfig = parseObject(config.env);
+  const configuredCodexHome =
+    typeof envConfig.CODEX_HOME === "string" && envConfig.CODEX_HOME.trim().length > 0
+      ? path.resolve(envConfig.CODEX_HOME.trim())
+      : null;
+  await ensureAbsoluteDirectory(cwd, { createIfMissing: true });
+  const preparedWorktreeCodexHome =
+    configuredCodexHome ? null : await prepareWorktreeCodexHome(process.env, onLog);
+  const effectiveCodexHome = configuredCodexHome ?? preparedWorktreeCodexHome;
+  await ensureCodexSkillsInjected(
+    onLog,
+    effectiveCodexHome ? { skillsHome: path.join(effectiveCodexHome, "skills") } : {},
+  );
  const hasExplicitApiKey =
    typeof envConfig.PAPERCLIP_API_KEY === "string" && envConfig.PAPERCLIP_API_KEY.trim().length > 0;
  const env: Record<string, string> = { ...buildPaperclipEnv(agent) };
+  if (effectiveCodexHome) {
+    env.CODEX_HOME = effectiveCodexHome;
+  }
  env.PAPERCLIP_RUN_ID = runId;
  const wakeTaskId =
    (typeof context.taskId === "string" && context.taskId.trim().length > 0 && context.taskId.trim()) ||
@@ -278,6 +339,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
  const instructionsFilePath = asString(config.instructionsFilePath, "").trim();
  const instructionsDir = instructionsFilePath ? `${path.dirname(instructionsFilePath)}/` : "";
  let instructionsPrefix = "";
+  let instructionsChars = 0;
  if (instructionsFilePath) {
    try {
      const instructionsContents = await fs.readFile(instructionsFilePath, "utf8");
@@ -285,6 +347,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
        `${instructionsContents}\n\n` +
        `The above agent instructions were loaded from ${instructionsFilePath}. ` +
        `Resolve any relative file references from ${instructionsDir}.\n\n`;
+      instructionsChars = instructionsPrefix.length;
      await onLog(
        "stderr",
        `[paperclip] Loaded agent instructions file: ${instructionsFilePath}\n`,
@@ -309,7 +372,8 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
      `Configured instructionsFilePath ${instructionsFilePath}, but file could not be read; continuing without injected instructions.`,
    ];
  })();
-  const renderedPrompt = renderTemplate(promptTemplate, {
+  const bootstrapPromptTemplate = asString(config.bootstrapPromptTemplate, "");
+  const templateData = {
    agentId: agent.id,
    companyId: agent.companyId,
    runId,
@@ -317,8 +381,26 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    agent,
    run: { id: runId, source: "on_demand" },
    context,
-  });
-  const prompt = `${instructionsPrefix}${renderedPrompt}`;
+  };
+  const renderedPrompt = renderTemplate(promptTemplate, templateData);
+  const renderedBootstrapPrompt =
+    !sessionId && bootstrapPromptTemplate.trim().length > 0
+      ? renderTemplate(bootstrapPromptTemplate, templateData).trim()
+      : "";
+  const sessionHandoffNote = asString(context.paperclipSessionHandoffMarkdown, "").trim();
+  const prompt = joinPromptSections([
+    instructionsPrefix,
+    renderedBootstrapPrompt,
+    sessionHandoffNote,
+    renderedPrompt,
+  ]);
+  const promptMetrics = {
+    promptChars: prompt.length,
+    instructionsChars,
+    bootstrapPromptChars: renderedBootstrapPrompt.length,
+    sessionHandoffChars: sessionHandoffNote.length,
+    heartbeatPromptChars: renderedPrompt.length,
+  };

  const buildArgs = (resumeSessionId: string | null) => {
    const args = ["exec", "--json"];
@@ -346,6 +428,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
        }),
        env: redactEnvForLogs(env),
        prompt,
+        promptMetrics,
        context,
      });
    }
--- a/packages/adapters/codex-local/src/ui/build-config.ts
+++ b/packages/adapters/codex-local/src/ui/build-config.ts
@@ -71,6 +71,7 @@ export function buildCodexLocalConfig(v: CreateConfigValues): Record<string, unk
  if (v.cwd) ac.cwd = v.cwd;
  if (v.instructionsFilePath) ac.instructionsFilePath = v.instructionsFilePath;
  if (v.promptTemplate) ac.promptTemplate = v.promptTemplate;
+  if (v.bootstrapPrompt) ac.bootstrapPromptTemplate = v.bootstrapPrompt;
  ac.model = v.model || DEFAULT_CODEX_LOCAL_MODEL;
  if (v.thinkingEffort) ac.modelReasoningEffort = v.thinkingEffort;
  ac.timeoutSec = 0;
--- a/packages/adapters/cursor-local/src/server/execute.ts
+++ b/packages/adapters/cursor-local/src/server/execute.ts
@@ -17,6 +17,7 @@ import {
  listPaperclipSkillEntries,
  removeMaintainerOnlySkillSymlinks,
  renderTemplate,
+  joinPromptSections,
  runChildProcess,
 } from "@paperclipai/adapter-utils/server-utils";
 import { DEFAULT_CURSOR_LOCAL_MODEL } from "../index.js";
@@ -268,6 +269,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
  const instructionsFilePath = asString(config.instructionsFilePath, "").trim();
  const instructionsDir = instructionsFilePath ? `${path.dirname(instructionsFilePath)}/` : "";
  let instructionsPrefix = "";
+  let instructionsChars = 0;
  if (instructionsFilePath) {
    try {
      const instructionsContents = await fs.readFile(instructionsFilePath, "utf8");
@@ -275,6 +277,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
        `${instructionsContents}\n\n` +
        `The above agent instructions were loaded from ${instructionsFilePath}. ` +
        `Resolve any relative file references from ${instructionsDir}.\n\n`;
+      instructionsChars = instructionsPrefix.length;
      await onLog(
        "stderr",
        `[paperclip] Loaded agent instructions file: ${instructionsFilePath}\n`,
@@ -307,7 +310,8 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    return notes;
  })();

-  const renderedPrompt = renderTemplate(promptTemplate, {
+  const bootstrapPromptTemplate = asString(config.bootstrapPromptTemplate, "");
+  const templateData = {
    agentId: agent.id,
    companyId: agent.companyId,
    runId,
@@ -315,9 +319,29 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    agent,
    run: { id: runId, source: "on_demand" },
    context,
-  });
+  };
+  const renderedPrompt = renderTemplate(promptTemplate, templateData);
+  const renderedBootstrapPrompt =
+    !sessionId && bootstrapPromptTemplate.trim().length > 0
+      ? renderTemplate(bootstrapPromptTemplate, templateData).trim()
+      : "";
+  const sessionHandoffNote = asString(context.paperclipSessionHandoffMarkdown, "").trim();
  const paperclipEnvNote = renderPaperclipEnvNote(env);
-  const prompt = `${instructionsPrefix}${paperclipEnvNote}${renderedPrompt}`;
+  const prompt = joinPromptSections([
+    instructionsPrefix,
+    renderedBootstrapPrompt,
+    sessionHandoffNote,
+    paperclipEnvNote,
+    renderedPrompt,
+  ]);
+  const promptMetrics = {
+    promptChars: prompt.length,
+    instructionsChars,
+    bootstrapPromptChars: renderedBootstrapPrompt.length,
+    sessionHandoffChars: sessionHandoffNote.length,
+    runtimeNoteChars: paperclipEnvNote.length,
+    heartbeatPromptChars: renderedPrompt.length,
+  };

  const buildArgs = (resumeSessionId: string | null) => {
    const args = ["-p", "--output-format", "stream-json", "--workspace", cwd];
@@ -340,6 +364,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
        commandArgs: args,
        env: redactEnvForLogs(env),
        prompt,
+        promptMetrics,
        context,
      });
    }
--- a/packages/adapters/cursor-local/src/ui/build-config.ts
+++ b/packages/adapters/cursor-local/src/ui/build-config.ts
@@ -62,6 +62,7 @@ export function buildCursorLocalConfig(v: CreateConfigValues): Record<string, un
  if (v.cwd) ac.cwd = v.cwd;
  if (v.instructionsFilePath) ac.instructionsFilePath = v.instructionsFilePath;
  if (v.promptTemplate) ac.promptTemplate = v.promptTemplate;
+  if (v.bootstrapPrompt) ac.bootstrapPromptTemplate = v.bootstrapPrompt;
  ac.model = v.model || DEFAULT_CURSOR_LOCAL_MODEL;
  const mode = normalizeMode(v.thinkingEffort);
  if (mode) ac.mode = mode;
--- a/packages/adapters/gemini-local/src/server/execute.ts
+++ b/packages/adapters/gemini-local/src/server/execute.ts
@@ -13,6 +13,7 @@ import {
  ensureAbsoluteDirectory,
  ensureCommandResolvable,
  ensurePaperclipSkillSymlink,
+  joinPromptSections,
  ensurePathInEnv,
  listPaperclipSkillEntries,
  removeMaintainerOnlySkillSymlinks,
@@ -268,7 +269,8 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    return notes;
  })();

-  const renderedPrompt = renderTemplate(promptTemplate, {
+  const bootstrapPromptTemplate = asString(config.bootstrapPromptTemplate, "");
+  const templateData = {
    agentId: agent.id,
    companyId: agent.companyId,
    runId,
@@ -276,10 +278,31 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    agent,
    run: { id: runId, source: "on_demand" },
    context,
-  });
+  };
+  const renderedPrompt = renderTemplate(promptTemplate, templateData);
+  const renderedBootstrapPrompt =
+    !sessionId && bootstrapPromptTemplate.trim().length > 0
+      ? renderTemplate(bootstrapPromptTemplate, templateData).trim()
+      : "";
+  const sessionHandoffNote = asString(context.paperclipSessionHandoffMarkdown, "").trim();
  const paperclipEnvNote = renderPaperclipEnvNote(env);
  const apiAccessNote = renderApiAccessNote(env);
-  const prompt = `${instructionsPrefix}${paperclipEnvNote}${apiAccessNote}${renderedPrompt}`;
+  const prompt = joinPromptSections([
+    instructionsPrefix,
+    renderedBootstrapPrompt,
+    sessionHandoffNote,
+    paperclipEnvNote,
+    apiAccessNote,
+    renderedPrompt,
+  ]);
+  const promptMetrics = {
+    promptChars: prompt.length,
+    instructionsChars: instructionsPrefix.length,
+    bootstrapPromptChars: renderedBootstrapPrompt.length,
+    sessionHandoffChars: sessionHandoffNote.length,
+    runtimeNoteChars: paperclipEnvNote.length + apiAccessNote.length,
+    heartbeatPromptChars: renderedPrompt.length,
+  };

  const buildArgs = (resumeSessionId: string | null) => {
    const args = ["--output-format", "stream-json"];
@@ -309,6 +332,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
        )),
        env: redactEnvForLogs(env),
        prompt,
+        promptMetrics,
        context,
      });
    }
--- a/packages/adapters/gemini-local/src/ui/build-config.ts
+++ b/packages/adapters/gemini-local/src/ui/build-config.ts
@@ -56,6 +56,7 @@ export function buildGeminiLocalConfig(v: CreateConfigValues): Record<string, un
  if (v.cwd) ac.cwd = v.cwd;
  if (v.instructionsFilePath) ac.instructionsFilePath = v.instructionsFilePath;
  if (v.promptTemplate) ac.promptTemplate = v.promptTemplate;
+  if (v.bootstrapPrompt) ac.bootstrapPromptTemplate = v.bootstrapPrompt;
  ac.model = v.model || DEFAULT_GEMINI_LOCAL_MODEL;
  ac.timeoutSec = 0;
  ac.graceSec = 15;
--- a/packages/adapters/opencode-local/src/server/execute.ts
+++ b/packages/adapters/opencode-local/src/server/execute.ts
@@ -9,6 +9,7 @@ import {
  asStringArray,
  parseObject,
  buildPaperclipEnv,
+  joinPromptSections,
  redactEnvForLogs,
  ensureAbsoluteDirectory,
  ensureCommandResolvable,
@@ -233,7 +234,8 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    ];
  })();

-  const renderedPrompt = renderTemplate(promptTemplate, {
+  const bootstrapPromptTemplate = asString(config.bootstrapPromptTemplate, "");
+  const templateData = {
    agentId: agent.id,
    companyId: agent.companyId,
    runId,
@@ -241,8 +243,26 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    agent,
    run: { id: runId, source: "on_demand" },
    context,
-  });
-  const prompt = `${instructionsPrefix}${renderedPrompt}`;
+  };
+  const renderedPrompt = renderTemplate(promptTemplate, templateData);
+  const renderedBootstrapPrompt =
+    !sessionId && bootstrapPromptTemplate.trim().length > 0
+      ? renderTemplate(bootstrapPromptTemplate, templateData).trim()
+      : "";
+  const sessionHandoffNote = asString(context.paperclipSessionHandoffMarkdown, "").trim();
+  const prompt = joinPromptSections([
+    instructionsPrefix,
+    renderedBootstrapPrompt,
+    sessionHandoffNote,
+    renderedPrompt,
+  ]);
+  const promptMetrics = {
+    promptChars: prompt.length,
+    instructionsChars: instructionsPrefix.length,
+    bootstrapPromptChars: renderedBootstrapPrompt.length,
+    sessionHandoffChars: sessionHandoffNote.length,
+    heartbeatPromptChars: renderedPrompt.length,
+  };

  const buildArgs = (resumeSessionId: string | null) => {
    const args = ["run", "--format", "json"];
@@ -264,6 +284,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
        commandArgs: [...args, `<stdin prompt ${prompt.length} chars>`],
        env: redactEnvForLogs(env),
        prompt,
+        promptMetrics,
        context,
      });
    }
--- a/packages/adapters/opencode-local/src/ui/build-config.ts
+++ b/packages/adapters/opencode-local/src/ui/build-config.ts
@@ -55,6 +55,7 @@ export function buildOpenCodeLocalConfig(v: CreateConfigValues): Record<string,
  if (v.cwd) ac.cwd = v.cwd;
  if (v.instructionsFilePath) ac.instructionsFilePath = v.instructionsFilePath;
  if (v.promptTemplate) ac.promptTemplate = v.promptTemplate;
+  if (v.bootstrapPrompt) ac.bootstrapPromptTemplate = v.bootstrapPrompt;
  if (v.model) ac.model = v.model;
  if (v.thinkingEffort) ac.variant = v.thinkingEffort;
  // OpenCode sessions can run until the CLI exits naturally; keep timeout disabled (0)
--- a/packages/adapters/pi-local/src/server/execute.ts
+++ b/packages/adapters/pi-local/src/server/execute.ts
@@ -9,6 +9,7 @@ import {
  asStringArray,
  parseObject,
  buildPaperclipEnv,
+  joinPromptSections,
  redactEnvForLogs,
  ensureAbsoluteDirectory,
  ensureCommandResolvable,
@@ -270,7 +271,8 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    systemPromptExtension = promptTemplate;
  }

-  const renderedSystemPromptExtension = renderTemplate(systemPromptExtension, {
+  const bootstrapPromptTemplate = asString(config.bootstrapPromptTemplate, "");
+  const templateData = {
    agentId: agent.id,
    companyId: agent.companyId,
    runId,
@@ -278,18 +280,26 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    agent,
    run: { id: runId, source: "on_demand" },
    context,
-  });
-
-  // User prompt is simple - just the rendered prompt template without instructions
-  const userPrompt = renderTemplate(promptTemplate, {
-    agentId: agent.id,
-    companyId: agent.companyId,
-    runId,
-    company: { id: agent.companyId },
-    agent,
-    run: { id: runId, source: "on_demand" },
-    context,
-  });
+  };
+  const renderedSystemPromptExtension = renderTemplate(systemPromptExtension, templateData);
+  const renderedHeartbeatPrompt = renderTemplate(promptTemplate, templateData);
+  const renderedBootstrapPrompt =
+    !canResumeSession && bootstrapPromptTemplate.trim().length > 0
+      ? renderTemplate(bootstrapPromptTemplate, templateData).trim()
+      : "";
+  const sessionHandoffNote = asString(context.paperclipSessionHandoffMarkdown, "").trim();
+  const userPrompt = joinPromptSections([
+    renderedBootstrapPrompt,
+    sessionHandoffNote,
+    renderedHeartbeatPrompt,
+  ]);
+  const promptMetrics = {
+    systemPromptChars: renderedSystemPromptExtension.length,
+    promptChars: userPrompt.length,
+    bootstrapPromptChars: renderedBootstrapPrompt.length,
+    sessionHandoffChars: sessionHandoffNote.length,
+    heartbeatPromptChars: renderedHeartbeatPrompt.length,
+  };

  const commandNotes = (() => {
    if (!resolvedInstructionsFilePath) return [] as string[];
@@ -345,6 +355,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
        commandArgs: args,
        env: redactEnvForLogs(env),
        prompt: userPrompt,
+        promptMetrics,
        context,
      });
    }
--- a/packages/adapters/pi-local/src/ui/build-config.ts
+++ b/packages/adapters/pi-local/src/ui/build-config.ts
@@ -48,6 +48,7 @@ export function buildPiLocalConfig(v: CreateConfigValues): Record<string, unknow
  if (v.cwd) ac.cwd = v.cwd;
  if (v.instructionsFilePath) ac.instructionsFilePath = v.instructionsFilePath;
  if (v.promptTemplate) ac.promptTemplate = v.promptTemplate;
+  if (v.bootstrapPrompt) ac.bootstrapPromptTemplate = v.bootstrapPrompt;
  if (v.model) ac.model = v.model;
  if (v.thinkingEffort) ac.thinking = v.thinkingEffort;
  
--- a/packages/shared/src/validators/agent.ts
+++ b/packages/shared/src/validators/agent.ts
@@ -78,6 +78,10 @@ export const wakeAgentSchema = z.object({
  reason: z.string().optional().nullable(),
  payload: z.record(z.unknown()).optional().nullable(),
  idempotencyKey: z.string().optional().nullable(),
+  forceFreshSession: z.preprocess(
+    (value) => (value === null ? undefined : value),
+    z.boolean().optional().default(false),
+  ),
 });

 export type WakeAgent = z.infer<typeof wakeAgentSchema>;
--- a/report/2026-03-13-08-46-token-optimization-implementation.md
+++ b/report/2026-03-13-08-46-token-optimization-implementation.md
@@ -0,0 +1,48 @@
+# Token Optimization Implementation Report
+
+Implemented the token-optimization plan across heartbeat orchestration, issue context APIs, adapter prompt construction, skill exposure, and agent configuration UX.
+
+The main behavior changes are:
+
+- Heartbeat telemetry now normalizes sessioned local adapter usage as per-run deltas instead of blindly trusting cumulative session totals.
+- Timer and manual wakes now preserve task sessions by default; fresh sessions are forced only for explicit `forceFreshSession` wakes or new issue assignment wakes.
+- Heartbeat session rotation is now policy-driven in the control plane, with a handoff note injected when a session is compacted and restarted.
+- Paperclip issue context now has incremental APIs: `GET /api/agents/me/inbox-lite`, `GET /api/issues/:id/heartbeat-context`, and comment delta queries via `GET /api/issues/:id/comments?after=...&order=asc`.
+- The `paperclip` skill now teaches agents to use those compact/incremental APIs first, while keeping full-thread fetches as a cold-start fallback.
+- All local adapters now separate first-session bootstrap prompts from per-heartbeat prompt templates, and emit prompt size metrics in invocation metadata.
+- Adapter create flows now persist `bootstrapPromptTemplate` correctly.
+- The agent config UI now explains the difference between bootstrap prompts and heartbeat prompts and warns about prompt churn.
+- Runtime skill defaults now include `paperclip`, `para-memory-files`, and `paperclip-create-agent`. `create-agent-adapter` was moved to `.agents/skills/create-agent-adapter`.
+
+Important follow-up finding from real-run review:
+
+- `codex_local` currently injects Paperclip skills into the shared Codex skills home (`$CODEX_HOME/skills` or `~/.codex/skills`) rather than mounting a worktree-local skill directory.
+- If a Paperclip-owned skill symlink already points at another live checkout, the adapter currently skips it instead of repointing it.
+- In practice, this means a worktree can contain newer `skills/paperclip/SKILL.md` guidance while Codex still follows an older checkout's skill content.
+- That likely explains why PAP-507 still showed full issue/comment reload behavior even though the incremental context work was already implemented in this branch.
+- This should be treated as a separate follow-up item for `codex_local` skill isolation or symlink repair.
+
+Files with the most important implementation work:
+
+- `server/src/services/heartbeat.ts`
+- `server/src/services/issues.ts`
+- `server/src/routes/issues.ts`
+- `server/src/routes/agents.ts`
+- `server/src/routes/access.ts`
+- `skills/paperclip/SKILL.md`
+- `packages/adapters/*/src/server/execute.ts`
+- `packages/adapters/*/src/ui/build-config.ts`
+- `ui/src/components/AgentConfigForm.tsx`
+
+Verification completed successfully:
+
+- `pnpm -r typecheck`
+- `pnpm test:run`
+- `pnpm build`
+
+While verifying, I also fixed two existing embedded-postgres typing mismatches so repo-wide `typecheck` and `build` pass again:
+
+- `packages/db/src/migration-runtime.ts`
+- `cli/src/commands/worktree.ts`
+
+Next useful follow-up is measuring the before/after effect in real runs now that telemetry is less misleading and prompt/session reuse behavior is consistent across adapters.
--- a/server/src/tests/codex-local-execute.test.ts
+++ b/server/src/tests/codex-local-execute.test.ts
@@ -0,0 +1,208 @@
+import { describe, expect, it } from "vitest";
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { execute } from "@paperclipai/adapter-codex-local/server";
+
+async function writeFakeCodexCommand(commandPath: string): Promise<void> {
+  const script = `#!/usr/bin/env node
+const fs = require("node:fs");
+
+const capturePath = process.env.PAPERCLIP_TEST_CAPTURE_PATH;
+const payload = {
+  argv: process.argv.slice(2),
+  prompt: fs.readFileSync(0, "utf8"),
+  codexHome: process.env.CODEX_HOME || null,
+  paperclipEnvKeys: Object.keys(process.env)
+    .filter((key) => key.startsWith("PAPERCLIP_"))
+    .sort(),
+};
+if (capturePath) {
+  fs.writeFileSync(capturePath, JSON.stringify(payload), "utf8");
+}
+console.log(JSON.stringify({ type: "thread.started", thread_id: "codex-session-1" }));
+console.log(JSON.stringify({ type: "item.completed", item: { type: "agent_message", text: "hello" } }));
+console.log(JSON.stringify({ type: "turn.completed", usage: { input_tokens: 1, cached_input_tokens: 0, output_tokens: 1 } }));
+`;
+  await fs.writeFile(commandPath, script, "utf8");
+  await fs.chmod(commandPath, 0o755);
+}
+
+type CapturePayload = {
+  argv: string[];
+  prompt: string;
+  codexHome: string | null;
+  paperclipEnvKeys: string[];
+};
+
+describe("codex execute", () => {
+  it("uses a worktree-isolated CODEX_HOME while preserving shared auth and config", async () => {
+    const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-codex-execute-"));
+    const workspace = path.join(root, "workspace");
+    const commandPath = path.join(root, "codex");
+    const capturePath = path.join(root, "capture.json");
+    const sharedCodexHome = path.join(root, "shared-codex-home");
+    const paperclipHome = path.join(root, "paperclip-home");
+    const isolatedCodexHome = path.join(paperclipHome, "instances", "worktree-1", "codex-home");
+    await fs.mkdir(workspace, { recursive: true });
+    await fs.mkdir(sharedCodexHome, { recursive: true });
+    await fs.writeFile(path.join(sharedCodexHome, "auth.json"), '{"token":"shared"}\n', "utf8");
+    await fs.writeFile(path.join(sharedCodexHome, "config.toml"), 'model = "codex-mini-latest"\n', "utf8");
+    await writeFakeCodexCommand(commandPath);
+
+    const previousHome = process.env.HOME;
+    const previousPaperclipHome = process.env.PAPERCLIP_HOME;
+    const previousPaperclipInstanceId = process.env.PAPERCLIP_INSTANCE_ID;
+    const previousPaperclipInWorktree = process.env.PAPERCLIP_IN_WORKTREE;
+    const previousCodexHome = process.env.CODEX_HOME;
+    process.env.HOME = root;
+    process.env.PAPERCLIP_HOME = paperclipHome;
+    process.env.PAPERCLIP_INSTANCE_ID = "worktree-1";
+    process.env.PAPERCLIP_IN_WORKTREE = "true";
+    process.env.CODEX_HOME = sharedCodexHome;
+
+    try {
+      const result = await execute({
+        runId: "run-1",
+        agent: {
+          id: "agent-1",
+          companyId: "company-1",
+          name: "Codex Coder",
+          adapterType: "codex_local",
+          adapterConfig: {},
+        },
+        runtime: {
+          sessionId: null,
+          sessionParams: null,
+          sessionDisplayId: null,
+          taskKey: null,
+        },
+        config: {
+          command: commandPath,
+          cwd: workspace,
+          env: {
+            PAPERCLIP_TEST_CAPTURE_PATH: capturePath,
+          },
+          promptTemplate: "Follow the paperclip heartbeat.",
+        },
+        context: {},
+        authToken: "run-jwt-token",
+        onLog: async () => {},
+      });
+
+      expect(result.exitCode).toBe(0);
+      expect(result.errorMessage).toBeNull();
+
+      const capture = JSON.parse(await fs.readFile(capturePath, "utf8")) as CapturePayload;
+      expect(capture.codexHome).toBe(isolatedCodexHome);
+      expect(capture.argv).toEqual(expect.arrayContaining(["exec", "--json", "-"]));
+      expect(capture.prompt).toContain("Follow the paperclip heartbeat.");
+      expect(capture.paperclipEnvKeys).toEqual(
+        expect.arrayContaining([
+          "PAPERCLIP_AGENT_ID",
+          "PAPERCLIP_API_KEY",
+          "PAPERCLIP_API_URL",
+          "PAPERCLIP_COMPANY_ID",
+          "PAPERCLIP_RUN_ID",
+        ]),
+      );
+
+      const isolatedAuth = path.join(isolatedCodexHome, "auth.json");
+      const isolatedConfig = path.join(isolatedCodexHome, "config.toml");
+      const isolatedSkill = path.join(isolatedCodexHome, "skills", "paperclip");
+
+      expect((await fs.lstat(isolatedAuth)).isSymbolicLink()).toBe(true);
+      expect(await fs.realpath(isolatedAuth)).toBe(await fs.realpath(path.join(sharedCodexHome, "auth.json")));
+      expect((await fs.lstat(isolatedConfig)).isFile()).toBe(true);
+      expect(await fs.readFile(isolatedConfig, "utf8")).toBe('model = "codex-mini-latest"\n');
+      expect((await fs.lstat(isolatedSkill)).isSymbolicLink()).toBe(true);
+    } finally {
+      if (previousHome === undefined) delete process.env.HOME;
+      else process.env.HOME = previousHome;
+      if (previousPaperclipHome === undefined) delete process.env.PAPERCLIP_HOME;
+      else process.env.PAPERCLIP_HOME = previousPaperclipHome;
+      if (previousPaperclipInstanceId === undefined) delete process.env.PAPERCLIP_INSTANCE_ID;
+      else process.env.PAPERCLIP_INSTANCE_ID = previousPaperclipInstanceId;
+      if (previousPaperclipInWorktree === undefined) delete process.env.PAPERCLIP_IN_WORKTREE;
+      else process.env.PAPERCLIP_IN_WORKTREE = previousPaperclipInWorktree;
+      if (previousCodexHome === undefined) delete process.env.CODEX_HOME;
+      else process.env.CODEX_HOME = previousCodexHome;
+      await fs.rm(root, { recursive: true, force: true });
+    }
+  });
+
+  it("respects an explicit CODEX_HOME config override even in worktree mode", async () => {
+    const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-codex-execute-explicit-"));
+    const workspace = path.join(root, "workspace");
+    const commandPath = path.join(root, "codex");
+    const capturePath = path.join(root, "capture.json");
+    const sharedCodexHome = path.join(root, "shared-codex-home");
+    const explicitCodexHome = path.join(root, "explicit-codex-home");
+    const paperclipHome = path.join(root, "paperclip-home");
+    await fs.mkdir(workspace, { recursive: true });
+    await fs.mkdir(sharedCodexHome, { recursive: true });
+    await fs.writeFile(path.join(sharedCodexHome, "auth.json"), '{"token":"shared"}\n', "utf8");
+    await writeFakeCodexCommand(commandPath);
+
+    const previousHome = process.env.HOME;
+    const previousPaperclipHome = process.env.PAPERCLIP_HOME;
+    const previousPaperclipInstanceId = process.env.PAPERCLIP_INSTANCE_ID;
+    const previousPaperclipInWorktree = process.env.PAPERCLIP_IN_WORKTREE;
+    const previousCodexHome = process.env.CODEX_HOME;
+    process.env.HOME = root;
+    process.env.PAPERCLIP_HOME = paperclipHome;
+    process.env.PAPERCLIP_INSTANCE_ID = "worktree-1";
+    process.env.PAPERCLIP_IN_WORKTREE = "true";
+    process.env.CODEX_HOME = sharedCodexHome;
+
+    try {
+      const result = await execute({
+        runId: "run-2",
+        agent: {
+          id: "agent-1",
+          companyId: "company-1",
+          name: "Codex Coder",
+          adapterType: "codex_local",
+          adapterConfig: {},
+        },
+        runtime: {
+          sessionId: null,
+          sessionParams: null,
+          sessionDisplayId: null,
+          taskKey: null,
+        },
+        config: {
+          command: commandPath,
+          cwd: workspace,
+          env: {
+            PAPERCLIP_TEST_CAPTURE_PATH: capturePath,
+            CODEX_HOME: explicitCodexHome,
+          },
+          promptTemplate: "Follow the paperclip heartbeat.",
+        },
+        context: {},
+        authToken: "run-jwt-token",
+        onLog: async () => {},
+      });
+
+      expect(result.exitCode).toBe(0);
+      expect(result.errorMessage).toBeNull();
+
+      const capture = JSON.parse(await fs.readFile(capturePath, "utf8")) as CapturePayload;
+      expect(capture.codexHome).toBe(explicitCodexHome);
+      await expect(fs.lstat(path.join(paperclipHome, "instances", "worktree-1", "codex-home"))).rejects.toThrow();
+    } finally {
+      if (previousHome === undefined) delete process.env.HOME;
+      else process.env.HOME = previousHome;
+      if (previousPaperclipHome === undefined) delete process.env.PAPERCLIP_HOME;
+      else process.env.PAPERCLIP_HOME = previousPaperclipHome;
+      if (previousPaperclipInstanceId === undefined) delete process.env.PAPERCLIP_INSTANCE_ID;
+      else process.env.PAPERCLIP_INSTANCE_ID = previousPaperclipInstanceId;
+      if (previousPaperclipInWorktree === undefined) delete process.env.PAPERCLIP_IN_WORKTREE;
+      else process.env.PAPERCLIP_IN_WORKTREE = previousPaperclipInWorktree;
+      if (previousCodexHome === undefined) delete process.env.CODEX_HOME;
+      else process.env.CODEX_HOME = previousCodexHome;
+      await fs.rm(root, { recursive: true, force: true });
+    }
+  });
+});
--- a/server/src/tests/codex-local-skill-injection.test.ts
+++ b/server/src/tests/codex-local-skill-injection.test.ts
@@ -0,0 +1,91 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { afterEach, describe, expect, it } from "vitest";
+import { ensureCodexSkillsInjected } from "@paperclipai/adapter-codex-local/server";
+
+async function makeTempDir(prefix: string): Promise<string> {
+  return fs.mkdtemp(path.join(os.tmpdir(), prefix));
+}
+
+async function createPaperclipRepoSkill(root: string, skillName: string) {
+  await fs.mkdir(path.join(root, "server"), { recursive: true });
+  await fs.mkdir(path.join(root, "packages", "adapter-utils"), { recursive: true });
+  await fs.mkdir(path.join(root, "skills", skillName), { recursive: true });
+  await fs.writeFile(path.join(root, "pnpm-workspace.yaml"), "packages:\n  - packages/*\n", "utf8");
+  await fs.writeFile(path.join(root, "package.json"), '{"name":"paperclip"}\n', "utf8");
+  await fs.writeFile(
+    path.join(root, "skills", skillName, "SKILL.md"),
+    `---\nname: ${skillName}\n---\n`,
+    "utf8",
+  );
+}
+
+async function createCustomSkill(root: string, skillName: string) {
+  await fs.mkdir(path.join(root, "custom", skillName), { recursive: true });
+  await fs.writeFile(
+    path.join(root, "custom", skillName, "SKILL.md"),
+    `---\nname: ${skillName}\n---\n`,
+    "utf8",
+  );
+}
+
+describe("codex local adapter skill injection", () => {
+  const cleanupDirs = new Set<string>();
+
+  afterEach(async () => {
+    await Promise.all(Array.from(cleanupDirs).map((dir) => fs.rm(dir, { recursive: true, force: true })));
+    cleanupDirs.clear();
+  });
+
+  it("repairs a Codex Paperclip skill symlink that still points at another live checkout", async () => {
+    const currentRepo = await makeTempDir("paperclip-codex-current-");
+    const oldRepo = await makeTempDir("paperclip-codex-old-");
+    const skillsHome = await makeTempDir("paperclip-codex-home-");
+    cleanupDirs.add(currentRepo);
+    cleanupDirs.add(oldRepo);
+    cleanupDirs.add(skillsHome);
+
+    await createPaperclipRepoSkill(currentRepo, "paperclip");
+    await createPaperclipRepoSkill(oldRepo, "paperclip");
+    await fs.symlink(path.join(oldRepo, "skills", "paperclip"), path.join(skillsHome, "paperclip"));
+
+    const logs: string[] = [];
+    await ensureCodexSkillsInjected(
+      async (_stream, chunk) => {
+        logs.push(chunk);
+      },
+      {
+        skillsHome,
+        skillsEntries: [{ name: "paperclip", source: path.join(currentRepo, "skills", "paperclip") }],
+      },
+    );
+
+    expect(await fs.realpath(path.join(skillsHome, "paperclip"))).toBe(
+      await fs.realpath(path.join(currentRepo, "skills", "paperclip")),
+    );
+    expect(logs.some((line) => line.includes('Repaired Codex skill "paperclip"'))).toBe(true);
+  });
+
+  it("preserves a custom Codex skill symlink outside Paperclip repo checkouts", async () => {
+    const currentRepo = await makeTempDir("paperclip-codex-current-");
+    const customRoot = await makeTempDir("paperclip-codex-custom-");
+    const skillsHome = await makeTempDir("paperclip-codex-home-");
+    cleanupDirs.add(currentRepo);
+    cleanupDirs.add(customRoot);
+    cleanupDirs.add(skillsHome);
+
+    await createPaperclipRepoSkill(currentRepo, "paperclip");
+    await createCustomSkill(customRoot, "paperclip");
+    await fs.symlink(path.join(customRoot, "custom", "paperclip"), path.join(skillsHome, "paperclip"));
+
+    await ensureCodexSkillsInjected(async () => {}, {
+      skillsHome,
+      skillsEntries: [{ name: "paperclip", source: path.join(currentRepo, "skills", "paperclip") }],
+    });
+
+    expect(await fs.realpath(path.join(skillsHome, "paperclip"))).toBe(
+      await fs.realpath(path.join(customRoot, "custom", "paperclip")),
+    );
+  });
+});
--- a/server/src/tests/heartbeat-workspace-session.test.ts
+++ b/server/src/tests/heartbeat-workspace-session.test.ts
@@ -93,16 +93,26 @@ describe("shouldResetTaskSessionForWake", () => {
    expect(shouldResetTaskSessionForWake({ wakeReason: "issue_assigned" })).toBe(true);
  });

-  it("resets session context on timer heartbeats", () => {
-    expect(shouldResetTaskSessionForWake({ wakeSource: "timer" })).toBe(true);
+  it("preserves session context on timer heartbeats", () => {
+    expect(shouldResetTaskSessionForWake({ wakeSource: "timer" })).toBe(false);
  });

-  it("resets session context on manual on-demand invokes", () => {
+  it("preserves session context on manual on-demand invokes by default", () => {
    expect(
      shouldResetTaskSessionForWake({
        wakeSource: "on_demand",
        wakeTriggerDetail: "manual",
      }),
+    ).toBe(false);
+  });
+
+  it("resets session context when a fresh session is explicitly requested", () => {
+    expect(
+      shouldResetTaskSessionForWake({
+        wakeSource: "on_demand",
+        wakeTriggerDetail: "manual",
+        forceFreshSession: true,
+      }),
    ).toBe(true);
  });

--- a/server/src/tests/ui-branding.test.ts
+++ b/server/src/tests/ui-branding.test.ts
@@ -1,8 +1,16 @@
 import { describe, expect, it } from "vitest";
-import { applyUiBranding, isWorktreeUiBrandingEnabled, renderFaviconLinks } from "../ui-branding.js";
+import {
+  applyUiBranding,
+  getWorktreeUiBranding,
+  isWorktreeUiBrandingEnabled,
+  renderFaviconLinks,
+  renderRuntimeBrandingMeta,
+} from "../ui-branding.js";

 const TEMPLATE = `<!doctype html>
 <head>
+    <!-- PAPERCLIP_RUNTIME_BRANDING_START -->
+    <!-- PAPERCLIP_RUNTIME_BRANDING_END -->
    <!-- PAPERCLIP_FAVICON_START -->
    <link rel="icon" href="/favicon.ico" sizes="48x48" />
    <link rel="icon" href="/favicon.svg" type="image/svg+xml" />
@@ -18,21 +26,57 @@ describe("ui branding", () => {
    expect(isWorktreeUiBrandingEnabled({ PAPERCLIP_IN_WORKTREE: "false" })).toBe(false);
  });

-  it("renders the worktree favicon asset set when enabled", () => {
-    const links = renderFaviconLinks(true);
-    expect(links).toContain("/worktree-favicon.ico");
-    expect(links).toContain("/worktree-favicon.svg");
-    expect(links).toContain("/worktree-favicon-32x32.png");
-    expect(links).toContain("/worktree-favicon-16x16.png");
+  it("resolves name, color, and text color for worktree branding", () => {
+    const branding = getWorktreeUiBranding({
+      PAPERCLIP_IN_WORKTREE: "true",
+      PAPERCLIP_WORKTREE_NAME: "paperclip-pr-432",
+      PAPERCLIP_WORKTREE_COLOR: "#4f86f7",
+    });
+
+    expect(branding.enabled).toBe(true);
+    expect(branding.name).toBe("paperclip-pr-432");
+    expect(branding.color).toBe("#4f86f7");
+    expect(branding.textColor).toMatch(/^#[0-9a-f]{6}$/);
+    expect(branding.faviconHref).toContain("data:image/svg+xml,");
  });

-  it("rewrites the favicon block for worktree instances only", () => {
-    const branded = applyUiBranding(TEMPLATE, { PAPERCLIP_IN_WORKTREE: "true" });
-    expect(branded).toContain("/worktree-favicon.svg");
+  it("renders a dynamic worktree favicon when enabled", () => {
+    const links = renderFaviconLinks(
+      getWorktreeUiBranding({
+        PAPERCLIP_IN_WORKTREE: "true",
+        PAPERCLIP_WORKTREE_NAME: "paperclip-pr-432",
+        PAPERCLIP_WORKTREE_COLOR: "#4f86f7",
+      }),
+    );
+    expect(links).toContain("data:image/svg+xml,");
+    expect(links).toContain('rel="shortcut icon"');
+  });
+
+  it("renders runtime branding metadata for the ui", () => {
+    const meta = renderRuntimeBrandingMeta(
+      getWorktreeUiBranding({
+        PAPERCLIP_IN_WORKTREE: "true",
+        PAPERCLIP_WORKTREE_NAME: "paperclip-pr-432",
+        PAPERCLIP_WORKTREE_COLOR: "#4f86f7",
+      }),
+    );
+    expect(meta).toContain('name="paperclip-worktree-name"');
+    expect(meta).toContain('content="paperclip-pr-432"');
+    expect(meta).toContain('name="paperclip-worktree-color"');
+  });
+
+  it("rewrites the favicon and runtime branding blocks for worktree instances only", () => {
+    const branded = applyUiBranding(TEMPLATE, {
+      PAPERCLIP_IN_WORKTREE: "true",
+      PAPERCLIP_WORKTREE_NAME: "paperclip-pr-432",
+      PAPERCLIP_WORKTREE_COLOR: "#4f86f7",
+    });
+    expect(branded).toContain("data:image/svg+xml,");
+    expect(branded).toContain('name="paperclip-worktree-name"');
    expect(branded).not.toContain('href="/favicon.svg"');

    const defaultHtml = applyUiBranding(TEMPLATE, {});
    expect(defaultHtml).toContain('href="/favicon.svg"');
-    expect(defaultHtml).not.toContain("/worktree-favicon.svg");
+    expect(defaultHtml).not.toContain('name="paperclip-worktree-name"');
  });
 });
--- a/server/src/routes/access.ts
+++ b/server/src/routes/access.ts
@@ -97,7 +97,11 @@ function requestBaseUrl(req: Request) {

 function readSkillMarkdown(skillName: string): string | null {
  const normalized = skillName.trim().toLowerCase();
-  if (normalized !== "paperclip" && normalized !== "paperclip-create-agent")
+  if (
+    normalized !== "paperclip" &&
+    normalized !== "paperclip-create-agent" &&
+    normalized !== "para-memory-files"
+  )
    return null;
  const moduleDir = path.dirname(fileURLToPath(import.meta.url));
  const candidates = [
@@ -1610,6 +1614,10 @@ export function accessRoutes(
    res.json({
      skills: [
        { name: "paperclip", path: "/api/skills/paperclip" },
+        {
+          name: "para-memory-files",
+          path: "/api/skills/para-memory-files"
+        },
        {
          name: "paperclip-create-agent",
          path: "/api/skills/paperclip-create-agent"
--- a/server/src/routes/agents.ts
+++ b/server/src/routes/agents.ts
@@ -575,6 +575,34 @@ export function agentRoutes(db: Db) {
    res.json({ ...agent, chainOfCommand });
  });

+  router.get("/agents/me/inbox-lite", async (req, res) => {
+    if (req.actor.type !== "agent" || !req.actor.agentId || !req.actor.companyId) {
+      res.status(401).json({ error: "Agent authentication required" });
+      return;
+    }
+
+    const issuesSvc = issueService(db);
+    const rows = await issuesSvc.list(req.actor.companyId, {
+      assigneeAgentId: req.actor.agentId,
+      status: "todo,in_progress,blocked",
+    });
+
+    res.json(
+      rows.map((issue) => ({
+        id: issue.id,
+        identifier: issue.identifier,
+        title: issue.title,
+        status: issue.status,
+        priority: issue.priority,
+        projectId: issue.projectId,
+        goalId: issue.goalId,
+        parentId: issue.parentId,
+        updatedAt: issue.updatedAt,
+        activeRun: issue.activeRun,
+      })),
+    );
+  });
+
  router.get("/agents/:id", async (req, res) => {
    const id = req.params.id as string;
    const agent = await svc.getById(id);
@@ -1275,6 +1303,7 @@ export function agentRoutes(db: Db) {
      contextSnapshot: {
        triggeredBy: req.actor.type,
        actorId: req.actor.type === "agent" ? req.actor.agentId : req.actor.userId,
+        forceFreshSession: req.body.forceFreshSession === true,
      },
    });

--- a/server/src/routes/issues.ts
+++ b/server/src/routes/issues.ts
@@ -31,6 +31,8 @@ import { assertCompanyAccess, getActorInfo } from "./authz.js";
 import { shouldWakeAssigneeOnCheckout } from "./issues-checkout-wakeup.js";
 import { isAllowedContentType, MAX_ATTACHMENT_BYTES } from "../attachment-types.js";

+const MAX_ISSUE_COMMENT_LIMIT = 500;
+
 export function issueRoutes(db: Db, storage: StorageService) {
  const router = Router();
  const svc = issueService(db);
@@ -320,6 +322,79 @@ export function issueRoutes(db: Db, storage: StorageService) {
    });
  });

+  router.get("/issues/:id/heartbeat-context", async (req, res) => {
+    const id = req.params.id as string;
+    const issue = await svc.getById(id);
+    if (!issue) {
+      res.status(404).json({ error: "Issue not found" });
+      return;
+    }
+    assertCompanyAccess(req, issue.companyId);
+
+    const wakeCommentId =
+      typeof req.query.wakeCommentId === "string" && req.query.wakeCommentId.trim().length > 0
+        ? req.query.wakeCommentId.trim()
+        : null;
+
+    const [ancestors, project, goal, commentCursor, wakeComment] = await Promise.all([
+      svc.getAncestors(issue.id),
+      issue.projectId ? projectsSvc.getById(issue.projectId) : null,
+      issue.goalId
+        ? goalsSvc.getById(issue.goalId)
+        : !issue.projectId
+          ? goalsSvc.getDefaultCompanyGoal(issue.companyId)
+          : null,
+      svc.getCommentCursor(issue.id),
+      wakeCommentId ? svc.getComment(wakeCommentId) : null,
+    ]);
+
+    res.json({
+      issue: {
+        id: issue.id,
+        identifier: issue.identifier,
+        title: issue.title,
+        description: issue.description,
+        status: issue.status,
+        priority: issue.priority,
+        projectId: issue.projectId,
+        goalId: goal?.id ?? issue.goalId,
+        parentId: issue.parentId,
+        assigneeAgentId: issue.assigneeAgentId,
+        assigneeUserId: issue.assigneeUserId,
+        updatedAt: issue.updatedAt,
+      },
+      ancestors: ancestors.map((ancestor) => ({
+        id: ancestor.id,
+        identifier: ancestor.identifier,
+        title: ancestor.title,
+        status: ancestor.status,
+        priority: ancestor.priority,
+      })),
+      project: project
+        ? {
+            id: project.id,
+            name: project.name,
+            status: project.status,
+            targetDate: project.targetDate,
+          }
+        : null,
+      goal: goal
+        ? {
+            id: goal.id,
+            title: goal.title,
+            status: goal.status,
+            level: goal.level,
+            parentId: goal.parentId,
+          }
+        : null,
+      commentCursor,
+      wakeComment:
+        wakeComment && wakeComment.issueId === issue.id
+          ? wakeComment
+          : null,
+    });
+  });
+
  router.get("/issues/:id/documents", async (req, res) => {
    const id = req.params.id as string;
    const issue = await svc.getById(id);
@@ -937,7 +1012,29 @@ export function issueRoutes(db: Db, storage: StorageService) {
      return;
    }
    assertCompanyAccess(req, issue.companyId);
-    const comments = await svc.listComments(id);
+    const afterCommentId =
+      typeof req.query.after === "string" && req.query.after.trim().length > 0
+        ? req.query.after.trim()
+        : typeof req.query.afterCommentId === "string" && req.query.afterCommentId.trim().length > 0
+          ? req.query.afterCommentId.trim()
+          : null;
+    const order =
+      typeof req.query.order === "string" && req.query.order.trim().toLowerCase() === "asc"
+        ? "asc"
+        : "desc";
+    const limitRaw =
+      typeof req.query.limit === "string" && req.query.limit.trim().length > 0
+        ? Number(req.query.limit)
+        : null;
+    const limit =
+      limitRaw && Number.isFinite(limitRaw) && limitRaw > 0
+        ? Math.min(Math.floor(limitRaw), MAX_ISSUE_COMMENT_LIMIT)
+        : null;
+    const comments = await svc.listComments(id, {
+      afterCommentId,
+      order,
+      limit,
+    });
    res.json(comments);
  });

--- a/server/src/services/heartbeat.ts
+++ b/server/src/services/heartbeat.ts
@@ -18,7 +18,7 @@ import { logger } from "../middleware/logger.js";
 import { publishLiveEvent } from "./live-events.js";
 import { getRunLogStore, type RunLogHandle } from "./run-log-store.js";
 import { getServerAdapter, runningProcesses } from "../adapters/index.js";
-import type { AdapterExecutionResult, AdapterInvocationMeta, AdapterSessionCodec } from "../adapters/index.js";
+import type { AdapterExecutionResult, AdapterInvocationMeta, AdapterSessionCodec, UsageSummary } from "../adapters/index.js";
 import { createLocalAgentJwt } from "../agent-auth-jwt.js";
 import { parseObject, asBoolean, asNumber, appendWithCap, MAX_EXCERPT_BYTES } from "../adapters/utils.js";
 import { costService } from "./costs.js";
@@ -47,6 +47,14 @@ const HEARTBEAT_MAX_CONCURRENT_RUNS_MAX = 10;
 const DEFERRED_WAKE_CONTEXT_KEY = "_paperclipWakeContext";
 const startLocksByAgent = new Map<string, Promise<void>>();
 const REPO_ONLY_CWD_SENTINEL = "/__paperclip_repo_only__";
+const SESSIONED_LOCAL_ADAPTERS = new Set([
+  "claude_local",
+  "codex_local",
+  "cursor",
+  "gemini_local",
+  "opencode_local",
+  "pi_local",
+]);

 const heartbeatRunListColumns = {
  id: heartbeatRuns.id,
@@ -117,6 +125,26 @@ interface WakeupOptions {
  contextSnapshot?: Record<string, unknown>;
 }

+type UsageTotals = {
+  inputTokens: number;
+  cachedInputTokens: number;
+  outputTokens: number;
+};
+
+type SessionCompactionPolicy = {
+  enabled: boolean;
+  maxSessionRuns: number;
+  maxRawInputTokens: number;
+  maxSessionAgeHours: number;
+};
+
+type SessionCompactionDecision = {
+  rotate: boolean;
+  reason: string | null;
+  handoffMarkdown: string | null;
+  previousRunId: string | null;
+};
+
 interface ParsedIssueAssigneeAdapterOverrides {
  adapterConfig: Record<string, unknown> | null;
  useProjectWorkspace: boolean | null;
@@ -142,6 +170,88 @@ function readNonEmptyString(value: unknown): string | null {
  return typeof value === "string" && value.trim().length > 0 ? value : null;
 }

+function normalizeUsageTotals(usage: UsageSummary | null | undefined): UsageTotals | null {
+  if (!usage) return null;
+  return {
+    inputTokens: Math.max(0, Math.floor(asNumber(usage.inputTokens, 0))),
+    cachedInputTokens: Math.max(0, Math.floor(asNumber(usage.cachedInputTokens, 0))),
+    outputTokens: Math.max(0, Math.floor(asNumber(usage.outputTokens, 0))),
+  };
+}
+
+function readRawUsageTotals(usageJson: unknown): UsageTotals | null {
+  const parsed = parseObject(usageJson);
+  if (Object.keys(parsed).length === 0) return null;
+
+  const inputTokens = Math.max(
+    0,
+    Math.floor(asNumber(parsed.rawInputTokens, asNumber(parsed.inputTokens, 0))),
+  );
+  const cachedInputTokens = Math.max(
+    0,
+    Math.floor(asNumber(parsed.rawCachedInputTokens, asNumber(parsed.cachedInputTokens, 0))),
+  );
+  const outputTokens = Math.max(
+    0,
+    Math.floor(asNumber(parsed.rawOutputTokens, asNumber(parsed.outputTokens, 0))),
+  );
+
+  if (inputTokens <= 0 && cachedInputTokens <= 0 && outputTokens <= 0) {
+    return null;
+  }
+
+  return {
+    inputTokens,
+    cachedInputTokens,
+    outputTokens,
+  };
+}
+
+function deriveNormalizedUsageDelta(current: UsageTotals | null, previous: UsageTotals | null): UsageTotals | null {
+  if (!current) return null;
+  if (!previous) return { ...current };
+
+  const inputTokens = current.inputTokens >= previous.inputTokens
+    ? current.inputTokens - previous.inputTokens
+    : current.inputTokens;
+  const cachedInputTokens = current.cachedInputTokens >= previous.cachedInputTokens
+    ? current.cachedInputTokens - previous.cachedInputTokens
+    : current.cachedInputTokens;
+  const outputTokens = current.outputTokens >= previous.outputTokens
+    ? current.outputTokens - previous.outputTokens
+    : current.outputTokens;
+
+  return {
+    inputTokens: Math.max(0, inputTokens),
+    cachedInputTokens: Math.max(0, cachedInputTokens),
+    outputTokens: Math.max(0, outputTokens),
+  };
+}
+
+function formatCount(value: number | null | undefined) {
+  if (typeof value !== "number" || !Number.isFinite(value)) return "0";
+  return value.toLocaleString("en-US");
+}
+
+function parseSessionCompactionPolicy(agent: typeof agents.$inferSelect): SessionCompactionPolicy {
+  const runtimeConfig = parseObject(agent.runtimeConfig);
+  const heartbeat = parseObject(runtimeConfig.heartbeat);
+  const compaction = parseObject(
+    heartbeat.sessionCompaction ?? heartbeat.sessionRotation ?? runtimeConfig.sessionCompaction,
+  );
+  const supportsSessions = SESSIONED_LOCAL_ADAPTERS.has(agent.adapterType);
+  const enabled = compaction.enabled === undefined
+    ? supportsSessions
+    : asBoolean(compaction.enabled, supportsSessions);
+
+  return {
+    enabled,
+    maxSessionRuns: Math.max(0, Math.floor(asNumber(compaction.maxSessionRuns, 200))),
+    maxRawInputTokens: Math.max(0, Math.floor(asNumber(compaction.maxRawInputTokens, 2_000_000))),
+    maxSessionAgeHours: Math.max(0, Math.floor(asNumber(compaction.maxSessionAgeHours, 72))),
+  };
+}
+
 export function resolveRuntimeSessionParamsForWorkspace(input: {
  agentId: string;
  previousSessionParams: Record<string, unknown> | null;
@@ -246,29 +356,20 @@ function deriveTaskKey(
 export function shouldResetTaskSessionForWake(
  contextSnapshot: Record<string, unknown> | null | undefined,
 ) {
+  if (contextSnapshot?.forceFreshSession === true) return true;
+
  const wakeReason = readNonEmptyString(contextSnapshot?.wakeReason);
  if (wakeReason === "issue_assigned") return true;
-
-  const wakeSource = readNonEmptyString(contextSnapshot?.wakeSource);
-  if (wakeSource === "timer") return true;
-
-  const wakeTriggerDetail = readNonEmptyString(contextSnapshot?.wakeTriggerDetail);
-  return wakeSource === "on_demand" && wakeTriggerDetail === "manual";
+  return false;
 }

 function describeSessionResetReason(
  contextSnapshot: Record<string, unknown> | null | undefined,
 ) {
+  if (contextSnapshot?.forceFreshSession === true) return "forceFreshSession was requested";
+
  const wakeReason = readNonEmptyString(contextSnapshot?.wakeReason);
  if (wakeReason === "issue_assigned") return "wake reason is issue_assigned";
-
-  const wakeSource = readNonEmptyString(contextSnapshot?.wakeSource);
-  if (wakeSource === "timer") return "wake source is timer";
-
-  const wakeTriggerDetail = readNonEmptyString(contextSnapshot?.wakeTriggerDetail);
-  if (wakeSource === "on_demand" && wakeTriggerDetail === "manual") {
-    return "this is a manual invoke";
-  }
  return null;
 }

@@ -501,6 +602,176 @@ export function heartbeatService(db: Db) {
      .then((rows) => rows[0] ?? null);
  }

+  async function getLatestRunForSession(
+    agentId: string,
+    sessionId: string,
+    opts?: { excludeRunId?: string | null },
+  ) {
+    const conditions = [
+      eq(heartbeatRuns.agentId, agentId),
+      eq(heartbeatRuns.sessionIdAfter, sessionId),
+    ];
+    if (opts?.excludeRunId) {
+      conditions.push(sql`${heartbeatRuns.id} <> ${opts.excludeRunId}`);
+    }
+    return db
+      .select()
+      .from(heartbeatRuns)
+      .where(and(...conditions))
+      .orderBy(desc(heartbeatRuns.createdAt))
+      .limit(1)
+      .then((rows) => rows[0] ?? null);
+  }
+
+  async function getOldestRunForSession(agentId: string, sessionId: string) {
+    return db
+      .select({
+        id: heartbeatRuns.id,
+        createdAt: heartbeatRuns.createdAt,
+      })
+      .from(heartbeatRuns)
+      .where(and(eq(heartbeatRuns.agentId, agentId), eq(heartbeatRuns.sessionIdAfter, sessionId)))
+      .orderBy(asc(heartbeatRuns.createdAt), asc(heartbeatRuns.id))
+      .limit(1)
+      .then((rows) => rows[0] ?? null);
+  }
+
+  async function resolveNormalizedUsageForSession(input: {
+    agentId: string;
+    runId: string;
+    sessionId: string | null;
+    rawUsage: UsageTotals | null;
+  }) {
+    const { agentId, runId, sessionId, rawUsage } = input;
+    if (!sessionId || !rawUsage) {
+      return {
+        normalizedUsage: rawUsage,
+        previousRawUsage: null as UsageTotals | null,
+        derivedFromSessionTotals: false,
+      };
+    }
+
+    const previousRun = await getLatestRunForSession(agentId, sessionId, { excludeRunId: runId });
+    const previousRawUsage = readRawUsageTotals(previousRun?.usageJson);
+    return {
+      normalizedUsage: deriveNormalizedUsageDelta(rawUsage, previousRawUsage),
+      previousRawUsage,
+      derivedFromSessionTotals: previousRawUsage !== null,
+    };
+  }
+
+  async function evaluateSessionCompaction(input: {
+    agent: typeof agents.$inferSelect;
+    sessionId: string | null;
+    issueId: string | null;
+  }): Promise<SessionCompactionDecision> {
+    const { agent, sessionId, issueId } = input;
+    if (!sessionId) {
+      return {
+        rotate: false,
+        reason: null,
+        handoffMarkdown: null,
+        previousRunId: null,
+      };
+    }
+
+    const policy = parseSessionCompactionPolicy(agent);
+    if (!policy.enabled) {
+      return {
+        rotate: false,
+        reason: null,
+        handoffMarkdown: null,
+        previousRunId: null,
+      };
+    }
+
+    const fetchLimit = Math.max(policy.maxSessionRuns > 0 ? policy.maxSessionRuns + 1 : 0, 4);
+    const runs = await db
+      .select({
+        id: heartbeatRuns.id,
+        createdAt: heartbeatRuns.createdAt,
+        usageJson: heartbeatRuns.usageJson,
+        resultJson: heartbeatRuns.resultJson,
+        error: heartbeatRuns.error,
+      })
+      .from(heartbeatRuns)
+      .where(and(eq(heartbeatRuns.agentId, agent.id), eq(heartbeatRuns.sessionIdAfter, sessionId)))
+      .orderBy(desc(heartbeatRuns.createdAt))
+      .limit(fetchLimit);
+
+    if (runs.length === 0) {
+      return {
+        rotate: false,
+        reason: null,
+        handoffMarkdown: null,
+        previousRunId: null,
+      };
+    }
+
+    const latestRun = runs[0] ?? null;
+    const oldestRun =
+      policy.maxSessionAgeHours > 0
+        ? await getOldestRunForSession(agent.id, sessionId)
+        : runs[runs.length - 1] ?? latestRun;
+    const latestRawUsage = readRawUsageTotals(latestRun?.usageJson);
+    const sessionAgeHours =
+      latestRun && oldestRun
+        ? Math.max(
+            0,
+            (new Date(latestRun.createdAt).getTime() - new Date(oldestRun.createdAt).getTime()) / (1000 * 60 * 60),
+          )
+        : 0;
+
+    let reason: string | null = null;
+    if (policy.maxSessionRuns > 0 && runs.length > policy.maxSessionRuns) {
+      reason = `session exceeded ${policy.maxSessionRuns} runs`;
+    } else if (
+      policy.maxRawInputTokens > 0 &&
+      latestRawUsage &&
+      latestRawUsage.inputTokens >= policy.maxRawInputTokens
+    ) {
+      reason =
+        `session raw input reached ${formatCount(latestRawUsage.inputTokens)} tokens ` +
+        `(threshold ${formatCount(policy.maxRawInputTokens)})`;
+    } else if (policy.maxSessionAgeHours > 0 && sessionAgeHours >= policy.maxSessionAgeHours) {
+      reason = `session age reached ${Math.floor(sessionAgeHours)} hours`;
+    }
+
+    if (!reason || !latestRun) {
+      return {
+        rotate: false,
+        reason: null,
+        handoffMarkdown: null,
+        previousRunId: latestRun?.id ?? null,
+      };
+    }
+
+    const latestSummary = summarizeHeartbeatRunResultJson(latestRun.resultJson);
+    const latestTextSummary =
+      readNonEmptyString(latestSummary?.summary) ??
+      readNonEmptyString(latestSummary?.result) ??
+      readNonEmptyString(latestSummary?.message) ??
+      readNonEmptyString(latestRun.error);
+
+    const handoffMarkdown = [
+      "Paperclip session handoff:",
+      `- Previous session: ${sessionId}`,
+      issueId ? `- Issue: ${issueId}` : "",
+      `- Rotation reason: ${reason}`,
+      latestTextSummary ? `- Last run summary: ${latestTextSummary}` : "",
+      "Continue from the current task state. Rebuild only the minimum context you need.",
+    ]
+      .filter(Boolean)
+      .join("\n");
+
+    return {
+      rotate: true,
+      reason,
+      handoffMarkdown,
+      previousRunId: latestRun.id,
+    };
+  }
+
  async function resolveSessionBeforeForWakeup(
    agent: typeof agents.$inferSelect,
    taskKey: string | null,
@@ -1016,9 +1287,10 @@ export function heartbeatService(db: Db) {
    run: typeof heartbeatRuns.$inferSelect,
    result: AdapterExecutionResult,
    session: { legacySessionId: string | null },
+    normalizedUsage?: UsageTotals | null,
  ) {
    await ensureRuntimeState(agent);
-    const usage = result.usage;
+    const usage = normalizedUsage ?? normalizeUsageTotals(result.usage);
    const inputTokens = usage?.inputTokens ?? 0;
    const outputTokens = usage?.outputTokens ?? 0;
    const cachedInputTokens = usage?.cachedInputTokens ?? 0;
@@ -1270,15 +1542,42 @@ export function heartbeatService(db: Db) {
      context.projectId = executionWorkspace.projectId;
    }
    const runtimeSessionFallback = taskKey || resetTaskSession ? null : runtime.sessionId;
-    const previousSessionDisplayId = truncateDisplayId(
+    let previousSessionDisplayId = truncateDisplayId(
      taskSessionForRun?.sessionDisplayId ??
        (sessionCodec.getDisplayId ? sessionCodec.getDisplayId(runtimeSessionParams) : null) ??
        readNonEmptyString(runtimeSessionParams?.sessionId) ??
        runtimeSessionFallback,
    );
+    let runtimeSessionIdForAdapter =
+      readNonEmptyString(runtimeSessionParams?.sessionId) ?? runtimeSessionFallback;
+    let runtimeSessionParamsForAdapter = runtimeSessionParams;
+
+    const sessionCompaction = await evaluateSessionCompaction({
+      agent,
+      sessionId: previousSessionDisplayId ?? runtimeSessionIdForAdapter,
+      issueId,
+    });
+    if (sessionCompaction.rotate) {
+      context.paperclipSessionHandoffMarkdown = sessionCompaction.handoffMarkdown;
+      context.paperclipSessionRotationReason = sessionCompaction.reason;
+      context.paperclipPreviousSessionId = previousSessionDisplayId ?? runtimeSessionIdForAdapter;
+      runtimeSessionIdForAdapter = null;
+      runtimeSessionParamsForAdapter = null;
+      previousSessionDisplayId = null;
+      if (sessionCompaction.reason) {
+        runtimeWorkspaceWarnings.push(
+          `Starting a fresh session because ${sessionCompaction.reason}.`,
+        );
+      }
+    } else {
+      delete context.paperclipSessionHandoffMarkdown;
+      delete context.paperclipSessionRotationReason;
+      delete context.paperclipPreviousSessionId;
+    }
+
    const runtimeForAdapter = {
-      sessionId: readNonEmptyString(runtimeSessionParams?.sessionId) ?? runtimeSessionFallback,
-      sessionParams: runtimeSessionParams,
+      sessionId: runtimeSessionIdForAdapter,
+      sessionParams: runtimeSessionParamsForAdapter,
      sessionDisplayId: previousSessionDisplayId,
      taskKey,
    };
@@ -1522,6 +1821,14 @@ export function heartbeatService(db: Db) {
        previousDisplayId: runtimeForAdapter.sessionDisplayId,
        previousLegacySessionId: runtimeForAdapter.sessionId,
      });
+      const rawUsage = normalizeUsageTotals(adapterResult.usage);
+      const sessionUsageResolution = await resolveNormalizedUsageForSession({
+        agentId: agent.id,
+        runId: run.id,
+        sessionId: nextSessionState.displayId ?? nextSessionState.legacySessionId,
+        rawUsage,
+      });
+      const normalizedUsage = sessionUsageResolution.normalizedUsage;

      let outcome: "succeeded" | "failed" | "cancelled" | "timed_out";
      const latestRun = await getRun(run.id);
@@ -1550,9 +1857,23 @@ export function heartbeatService(db: Db) {
              : "failed";

      const usageJson =
-        adapterResult.usage || adapterResult.costUsd != null
+        normalizedUsage || adapterResult.costUsd != null
          ? ({
-              ...(adapterResult.usage ?? {}),
+              ...(normalizedUsage ?? {}),
+              ...(rawUsage ? {
+                rawInputTokens: rawUsage.inputTokens,
+                rawCachedInputTokens: rawUsage.cachedInputTokens,
+                rawOutputTokens: rawUsage.outputTokens,
+              } : {}),
+              ...(sessionUsageResolution.derivedFromSessionTotals ? { usageSource: "session_delta" } : {}),
+              ...((nextSessionState.displayId ?? nextSessionState.legacySessionId)
+                ? { persistedSessionId: nextSessionState.displayId ?? nextSessionState.legacySessionId }
+                : {}),
+              sessionReused: runtimeForAdapter.sessionId != null || runtimeForAdapter.sessionDisplayId != null,
+              taskSessionReused: taskSessionForRun != null,
+              freshSession: runtimeForAdapter.sessionId == null && runtimeForAdapter.sessionDisplayId == null,
+              sessionRotated: sessionCompaction.rotate,
+              sessionRotationReason: sessionCompaction.reason,
              ...(adapterResult.costUsd != null ? { costUsd: adapterResult.costUsd } : {}),
              ...(adapterResult.billingType ? { billingType: adapterResult.billingType } : {}),
            } as Record<string, unknown>)
@@ -1609,7 +1930,7 @@ export function heartbeatService(db: Db) {
      if (finalizedRun) {
        await updateRuntimeState(agent, finalizedRun, adapterResult, {
          legacySessionId: nextSessionState.legacySessionId,
-        });
+        }, normalizedUsage);
        if (taskKey) {
          if (adapterResult.clearSession || (!nextSessionState.params && !nextSessionState.displayId)) {
            await clearTaskSessions(agent.companyId, agent.id, {
--- a/server/src/services/issues.ts
+++ b/server/src/services/issues.ts
@@ -27,6 +27,7 @@ import { resolveIssueGoalId, resolveNextIssueGoalId } from "./issue-goal-fallbac
 import { getDefaultCompanyGoal } from "./goals.js";

 const ALL_ISSUE_STATUSES = ["backlog", "todo", "in_progress", "in_review", "blocked", "done", "cancelled"];
+const MAX_ISSUE_COMMENT_PAGE_LIMIT = 500;

 function assertTransition(from: string, to: string) {
  if (from === to) return;
@@ -1060,13 +1061,86 @@ export function issueService(db: Db) {
        .returning()
        .then((rows) => rows[0] ?? null),

-    listComments: (issueId: string) =>
-      db
+    listComments: async (
+      issueId: string,
+      opts?: {
+        afterCommentId?: string | null;
+        order?: "asc" | "desc";
+        limit?: number | null;
+      },
+    ) => {
+      const order = opts?.order === "asc" ? "asc" : "desc";
+      const afterCommentId = opts?.afterCommentId?.trim() || null;
+      const limit =
+        opts?.limit && opts.limit > 0
+          ? Math.min(Math.floor(opts.limit), MAX_ISSUE_COMMENT_PAGE_LIMIT)
+          : null;
+
+      const conditions = [eq(issueComments.issueId, issueId)];
+      if (afterCommentId) {
+        const anchor = await db
+          .select({
+            id: issueComments.id,
+            createdAt: issueComments.createdAt,
+          })
+          .from(issueComments)
+          .where(and(eq(issueComments.issueId, issueId), eq(issueComments.id, afterCommentId)))
+          .then((rows) => rows[0] ?? null);
+
+        if (!anchor) return [];
+        conditions.push(
+          order === "asc"
+            ? sql<boolean>`(
+                ${issueComments.createdAt} > ${anchor.createdAt}
+                OR (${issueComments.createdAt} = ${anchor.createdAt} AND ${issueComments.id} > ${anchor.id})
+              )`
+            : sql<boolean>`(
+                ${issueComments.createdAt} < ${anchor.createdAt}
+                OR (${issueComments.createdAt} = ${anchor.createdAt} AND ${issueComments.id} < ${anchor.id})
+              )`,
+        );
+      }
+
+      const query = db
        .select()
        .from(issueComments)
-        .where(eq(issueComments.issueId, issueId))
-        .orderBy(desc(issueComments.createdAt))
-        .then((comments) => comments.map(redactIssueComment)),
+        .where(and(...conditions))
+        .orderBy(
+          order === "asc" ? asc(issueComments.createdAt) : desc(issueComments.createdAt),
+          order === "asc" ? asc(issueComments.id) : desc(issueComments.id),
+        );
+
+      const comments = limit ? await query.limit(limit) : await query;
+      return comments.map(redactIssueComment);
+    },
+
+    getCommentCursor: async (issueId: string) => {
+      const [latest, countRow] = await Promise.all([
+        db
+          .select({
+            latestCommentId: issueComments.id,
+            latestCommentAt: issueComments.createdAt,
+          })
+          .from(issueComments)
+          .where(eq(issueComments.issueId, issueId))
+          .orderBy(desc(issueComments.createdAt), desc(issueComments.id))
+          .limit(1)
+          .then((rows) => rows[0] ?? null),
+        db
+          .select({
+            totalComments: sql<number>`count(*)::int`,
+          })
+          .from(issueComments)
+          .where(eq(issueComments.issueId, issueId))
+          .then((rows) => rows[0] ?? null),
+      ]);
+
+      return {
+        totalComments: Number(countRow?.totalComments ?? 0),
+        latestCommentId: latest?.latestCommentId ?? null,
+        latestCommentAt: latest?.latestCommentAt ?? null,
+      };
+    },

    getComment: (commentId: string) =>
      db
--- a/server/src/ui-branding.ts
+++ b/server/src/ui-branding.ts
@@ -1,5 +1,7 @@
 const FAVICON_BLOCK_START = "<!-- PAPERCLIP_FAVICON_START -->";
 const FAVICON_BLOCK_END = "<!-- PAPERCLIP_FAVICON_END -->";
+const RUNTIME_BRANDING_BLOCK_START = "<!-- PAPERCLIP_RUNTIME_BRANDING_START -->";
+const RUNTIME_BRANDING_BLOCK_END = "<!-- PAPERCLIP_RUNTIME_BRANDING_END -->";

 const DEFAULT_FAVICON_LINKS = [
  '<link rel="icon" href="/favicon.ico" sizes="48x48" />',
@@ -8,12 +10,13 @@ const DEFAULT_FAVICON_LINKS = [
  '<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png" />',
 ].join("\n");

-const WORKTREE_FAVICON_LINKS = [
-  '<link rel="icon" href="/worktree-favicon.ico" sizes="48x48" />',
-  '<link rel="icon" href="/worktree-favicon.svg" type="image/svg+xml" />',
-  '<link rel="icon" type="image/png" sizes="32x32" href="/worktree-favicon-32x32.png" />',
-  '<link rel="icon" type="image/png" sizes="16x16" href="/worktree-favicon-16x16.png" />',
-].join("\n");
+export type WorktreeUiBranding = {
+  enabled: boolean;
+  name: string | null;
+  color: string | null;
+  textColor: string | null;
+  faviconHref: string | null;
+};

 function isTruthyEnvValue(value: string | undefined): boolean {
  if (!value) return false;
@@ -21,21 +24,194 @@ function isTruthyEnvValue(value: string | undefined): boolean {
  return normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on";
 }

+function nonEmpty(value: string | undefined): string | null {
+  if (typeof value !== "string") return null;
+  const normalized = value.trim();
+  return normalized.length > 0 ? normalized : null;
+}
+
+function normalizeHexColor(value: string | undefined): string | null {
+  const raw = nonEmpty(value);
+  if (!raw) return null;
+  const hex = raw.startsWith("#") ? raw.slice(1) : raw;
+  if (/^[0-9a-fA-F]{3}$/.test(hex)) {
+    return `#${hex.split("").map((char) => `${char}${char}`).join("").toLowerCase()}`;
+  }
+  if (/^[0-9a-fA-F]{6}$/.test(hex)) {
+    return `#${hex.toLowerCase()}`;
+  }
+  return null;
+}
+
+function hslComponentToHex(n: number): string {
+  return Math.round(Math.max(0, Math.min(255, n)))
+    .toString(16)
+    .padStart(2, "0");
+}
+
+function hslToHex(hue: number, saturation: number, lightness: number): string {
+  const s = Math.max(0, Math.min(100, saturation)) / 100;
+  const l = Math.max(0, Math.min(100, lightness)) / 100;
+  const c = (1 - Math.abs((2 * l) - 1)) * s;
+  const h = ((hue % 360) + 360) % 360;
+  const x = c * (1 - Math.abs(((h / 60) % 2) - 1));
+  const m = l - (c / 2);
+
+  let r = 0;
+  let g = 0;
+  let b = 0;
+
+  if (h < 60) {
+    r = c;
+    g = x;
+  } else if (h < 120) {
+    r = x;
+    g = c;
+  } else if (h < 180) {
+    g = c;
+    b = x;
+  } else if (h < 240) {
+    g = x;
+    b = c;
+  } else if (h < 300) {
+    r = x;
+    b = c;
+  } else {
+    r = c;
+    b = x;
+  }
+
+  return `#${hslComponentToHex((r + m) * 255)}${hslComponentToHex((g + m) * 255)}${hslComponentToHex((b + m) * 255)}`;
+}
+
+function deriveColorFromSeed(seed: string): string {
+  let hash = 0;
+  for (const char of seed) {
+    hash = ((hash * 33) + char.charCodeAt(0)) >>> 0;
+  }
+  return hslToHex(hash % 360, 68, 56);
+}
+
+function hexToRgb(color: string): { r: number; g: number; b: number } {
+  const normalized = normalizeHexColor(color) ?? "#000000";
+  return {
+    r: Number.parseInt(normalized.slice(1, 3), 16),
+    g: Number.parseInt(normalized.slice(3, 5), 16),
+    b: Number.parseInt(normalized.slice(5, 7), 16),
+  };
+}
+
+function relativeLuminanceChannel(value: number): number {
+  const normalized = value / 255;
+  return normalized <= 0.03928 ? normalized / 12.92 : ((normalized + 0.055) / 1.055) ** 2.4;
+}
+
+function relativeLuminance(color: string): number {
+  const { r, g, b } = hexToRgb(color);
+  return (
+    (0.2126 * relativeLuminanceChannel(r)) +
+    (0.7152 * relativeLuminanceChannel(g)) +
+    (0.0722 * relativeLuminanceChannel(b))
+  );
+}
+
+function pickReadableTextColor(background: string): string {
+  const backgroundLuminance = relativeLuminance(background);
+  const whiteContrast = 1.05 / (backgroundLuminance + 0.05);
+  const blackContrast = (backgroundLuminance + 0.05) / 0.05;
+  return whiteContrast >= blackContrast ? "#f8fafc" : "#111827";
+}
+
+function escapeHtmlAttribute(value: string): string {
+  return value
+    .replaceAll("&", "&amp;")
+    .replaceAll('"', "&quot;")
+    .replaceAll("<", "&lt;")
+    .replaceAll(">", "&gt;");
+}
+
+function createFaviconDataUrl(background: string, foreground: string): string {
+  const svg = [
+    '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none">',
+    `<rect width="24" height="24" rx="6" fill="${background}"/>`,
+    `<path stroke="${foreground}" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.15" d="m16 6-8.414 8.586a2 2 0 0 0 2.829 2.829l8.414-8.586a4 4 0 1 0-5.657-5.657l-8.379 8.551a6 6 0 1 0 8.485 8.485l8.379-8.551"/>`,
+    "</svg>",
+  ].join("");
+  return `data:image/svg+xml,${encodeURIComponent(svg)}`;
+}
+
 export function isWorktreeUiBrandingEnabled(env: NodeJS.ProcessEnv = process.env): boolean {
  return isTruthyEnvValue(env.PAPERCLIP_IN_WORKTREE);
 }

-export function renderFaviconLinks(worktree: boolean): string {
-  return worktree ? WORKTREE_FAVICON_LINKS : DEFAULT_FAVICON_LINKS;
+export function getWorktreeUiBranding(env: NodeJS.ProcessEnv = process.env): WorktreeUiBranding {
+  if (!isWorktreeUiBrandingEnabled(env)) {
+    return {
+      enabled: false,
+      name: null,
+      color: null,
+      textColor: null,
+      faviconHref: null,
+    };
+  }
+
+  const name = nonEmpty(env.PAPERCLIP_WORKTREE_NAME) ?? nonEmpty(env.PAPERCLIP_INSTANCE_ID) ?? "worktree";
+  const color = normalizeHexColor(env.PAPERCLIP_WORKTREE_COLOR) ?? deriveColorFromSeed(name);
+  const textColor = pickReadableTextColor(color);
+
+  return {
+    enabled: true,
+    name,
+    color,
+    textColor,
+    faviconHref: createFaviconDataUrl(color, textColor),
+  };
+}
+
+export function renderFaviconLinks(branding: WorktreeUiBranding): string {
+  if (!branding.enabled || !branding.faviconHref) return DEFAULT_FAVICON_LINKS;
+
+  const href = escapeHtmlAttribute(branding.faviconHref);
+  return [
+    `<link rel="icon" href="${href}" type="image/svg+xml" sizes="any" />`,
+    `<link rel="shortcut icon" href="${href}" type="image/svg+xml" />`,
+  ].join("\n");
+}
+
+export function renderRuntimeBrandingMeta(branding: WorktreeUiBranding): string {
+  if (!branding.enabled || !branding.name || !branding.color || !branding.textColor) return "";
+
+  return [
+    '<meta name="paperclip-worktree-enabled" content="true" />',
+    `<meta name="paperclip-worktree-name" content="${escapeHtmlAttribute(branding.name)}" />`,
+    `<meta name="paperclip-worktree-color" content="${escapeHtmlAttribute(branding.color)}" />`,
+    `<meta name="paperclip-worktree-text-color" content="${escapeHtmlAttribute(branding.textColor)}" />`,
+  ].join("\n");
+}
+
+function replaceMarkedBlock(html: string, startMarker: string, endMarker: string, content: string): string {
+  const start = html.indexOf(startMarker);
+  const end = html.indexOf(endMarker);
+  if (start === -1 || end === -1 || end < start) return html;
+
+  const before = html.slice(0, start + startMarker.length);
+  const after = html.slice(end);
+  const indentedContent = content
+    ? `\n${content
+      .split("\n")
+      .map((line) => `    ${line}`)
+      .join("\n")}\n    `
+    : "\n    ";
+  return `${before}${indentedContent}${after}`;
 }

 export function applyUiBranding(html: string, env: NodeJS.ProcessEnv = process.env): string {
-  const start = html.indexOf(FAVICON_BLOCK_START);
-  const end = html.indexOf(FAVICON_BLOCK_END);
-  if (start === -1 || end === -1 || end < start) return html;
-
-  const before = html.slice(0, start + FAVICON_BLOCK_START.length);
-  const after = html.slice(end);
-  const links = renderFaviconLinks(isWorktreeUiBrandingEnabled(env));
-  return `${before}\n${links}\n    ${after}`;
+  const branding = getWorktreeUiBranding(env);
+  const withFavicon = replaceMarkedBlock(html, FAVICON_BLOCK_START, FAVICON_BLOCK_END, renderFaviconLinks(branding));
+  return replaceMarkedBlock(
+    withFavicon,
+    RUNTIME_BRANDING_BLOCK_START,
+    RUNTIME_BRANDING_BLOCK_END,
+    renderRuntimeBrandingMeta(branding),
+  );
 }
--- a/skills/paperclip/SKILL.md
+++ b/skills/paperclip/SKILL.md
@@ -35,7 +35,7 @@ Follow these steps every time you wake up:
  - add a markdown comment explaining why it remains open and what happens next.
    Always include links to the approval and issue in that comment.

-**Step 3 — Get assignments.** `GET /api/companies/{companyId}/issues?assigneeAgentId={your-agent-id}&status=todo,in_progress,blocked`. Results sorted by priority. This is your inbox.
+**Step 3 — Get assignments.** Prefer `GET /api/agents/me/inbox-lite` for the normal heartbeat inbox. It returns the compact assignment list you need for prioritization. Fall back to `GET /api/companies/{companyId}/issues?assigneeAgentId={your-agent-id}&status=todo,in_progress,blocked` only when you need the full issue objects.

 **Step 4 — Pick work (with mention exception).** Work on `in_progress` first, then `todo`. Skip `blocked` unless you can unblock it.
 **Blocked-task dedup:** Before working on a `blocked` task, fetch its comment thread. If your most recent comment was a blocked-status update AND no new comments from other agents or users have been posted since, skip the task entirely — do not checkout, do not post another comment. Exit the heartbeat (or move to the next task) instead. Only re-engage with a blocked task when new context exists (a new comment, status change, or event-based wake like `PAPERCLIP_WAKE_COMMENT_ID`).
@@ -56,8 +56,15 @@ Headers: Authorization: Bearer $PAPERCLIP_API_KEY, X-Paperclip-Run-Id: $PAPERCLI

 If already checked out by you, returns normally. If owned by another agent: `409 Conflict` — stop, pick a different task. **Never retry a 409.**

-**Step 6 — Understand context.** `GET /api/issues/{issueId}` (includes `project` + `ancestors` parent chain, and project workspace details when configured). `GET /api/issues/{issueId}/comments`. Read ancestors to understand _why_ this task exists.
-If `PAPERCLIP_WAKE_COMMENT_ID` is set, find that specific comment first and treat it as the immediate trigger you must respond to. Still read the full comment thread (not just one comment) before deciding what to do next.
+**Step 6 — Understand context.** Prefer `GET /api/issues/{issueId}/heartbeat-context` first. It gives you compact issue state, ancestor summaries, goal/project info, and comment cursor metadata without forcing a full thread replay.
+
+Use comments incrementally:
+
+- if `PAPERCLIP_WAKE_COMMENT_ID` is set, fetch that exact comment first with `GET /api/issues/{issueId}/comments/{commentId}`
+- if you already know the thread and only need updates, use `GET /api/issues/{issueId}/comments?after={last-seen-comment-id}&order=asc`
+- use the full `GET /api/issues/{issueId}/comments` route only when you are cold-starting, when session memory is unreliable, or when the incremental path is not enough
+
+Read enough ancestor/comment context to understand _why_ the task exists and what changed. Do not reflexively reload the whole thread on every heartbeat.

 **Step 7 — Do the work.** Use your tools and capabilities.

@@ -218,6 +225,7 @@ PATCH /api/agents/{agentId}/instructions-path
 | Action                                | Endpoint                                                                                   |
 | ------------------------------------- | ------------------------------------------------------------------------------------------ |
 | My identity                           | `GET /api/agents/me`                                                                       |
+| My compact inbox                      | `GET /api/agents/me/inbox-lite`                                                            |
 | My assignments                        | `GET /api/companies/:companyId/issues?assigneeAgentId=:id&status=todo,in_progress,blocked` |
 | Checkout task                         | `POST /api/issues/:issueId/checkout`                                                       |
 | Get task + ancestors                  | `GET /api/issues/:issueId`                                                                 |
@@ -225,7 +233,9 @@ PATCH /api/agents/{agentId}/instructions-path
 | Get issue document                    | `GET /api/issues/:issueId/documents/:key`                                                  |
 | Create/update issue document          | `PUT /api/issues/:issueId/documents/:key`                                                  |
 | Get issue document revisions          | `GET /api/issues/:issueId/documents/:key/revisions`                                        |
+| Get compact heartbeat context         | `GET /api/issues/:issueId/heartbeat-context`                                               |
 | Get comments                          | `GET /api/issues/:issueId/comments`                                                        |
+| Get comment delta                     | `GET /api/issues/:issueId/comments?after=:commentId&order=asc`                             |
 | Get specific comment                  | `GET /api/issues/:issueId/comments/:commentId`                                             |
 | Update task                           | `PATCH /api/issues/:issueId` (optional `comment` field)                                    |
 | Add comment                           | `POST /api/issues/:issueId/comments`                                                       |
--- a/ui/index.html
+++ b/ui/index.html
@@ -8,6 +8,8 @@
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
    <meta name="apple-mobile-web-app-title" content="Paperclip" />
    <title>Paperclip</title>
+    <!-- PAPERCLIP_RUNTIME_BRANDING_START -->
+    <!-- PAPERCLIP_RUNTIME_BRANDING_END -->
    <!-- PAPERCLIP_FAVICON_START -->
    <link rel="icon" href="/favicon.ico" sizes="48x48" />
    <link rel="icon" href="/favicon.svg" type="image/svg+xml" />
--- a/ui/src/components/AgentConfigForm.tsx
+++ b/ui/src/components/AgentConfigForm.tsx
@@ -444,23 +444,28 @@ export function AgentConfigForm(props: AgentConfigFormProps) {
              />
            </Field>
            {isLocal && (
-              <Field label="Prompt Template" hint={help.promptTemplate}>
-                <MarkdownEditor
-                  value={eff(
-                    "adapterConfig",
-                    "promptTemplate",
-                    String(config.promptTemplate ?? ""),
-                  )}
-                  onChange={(v) => mark("adapterConfig", "promptTemplate", v ?? "")}
-                  placeholder="You are agent {{ agent.name }}. Your role is {{ agent.role }}..."
-                  contentClassName="min-h-[88px] text-sm font-mono"
-                  imageUploadHandler={async (file) => {
-                    const namespace = `agents/${props.agent.id}/prompt-template`;
-                    const asset = await uploadMarkdownImage.mutateAsync({ file, namespace });
-                    return asset.contentPath;
-                  }}
-                />
-              </Field>
+              <>
+                <Field label="Prompt Template" hint={help.promptTemplate}>
+                  <MarkdownEditor
+                    value={eff(
+                      "adapterConfig",
+                      "promptTemplate",
+                      String(config.promptTemplate ?? ""),
+                    )}
+                    onChange={(v) => mark("adapterConfig", "promptTemplate", v ?? "")}
+                    placeholder="You are agent {{ agent.name }}. Your role is {{ agent.role }}..."
+                    contentClassName="min-h-[88px] text-sm font-mono"
+                    imageUploadHandler={async (file) => {
+                      const namespace = `agents/${props.agent.id}/prompt-template`;
+                      const asset = await uploadMarkdownImage.mutateAsync({ file, namespace });
+                      return asset.contentPath;
+                    }}
+                  />
+                </Field>
+                <div className="rounded-md border border-amber-500/25 bg-amber-500/10 px-3 py-2 text-xs text-amber-100">
+                  Prompt template is replayed on every heartbeat. Keep it compact and dynamic to avoid recurring token cost and cache churn.
+                </div>
+              </>
            )}
          </div>
        </div>
@@ -576,19 +581,24 @@ export function AgentConfigForm(props: AgentConfigFormProps) {

          {/* Prompt template (create mode only — edit mode shows this in Identity) */}
          {isLocal && isCreate && (
-            <Field label="Prompt Template" hint={help.promptTemplate}>
-              <MarkdownEditor
-                value={val!.promptTemplate}
-                onChange={(v) => set!({ promptTemplate: v })}
-                placeholder="You are agent {{ agent.name }}. Your role is {{ agent.role }}..."
-                contentClassName="min-h-[88px] text-sm font-mono"
-                imageUploadHandler={async (file) => {
-                  const namespace = "agents/drafts/prompt-template";
-                  const asset = await uploadMarkdownImage.mutateAsync({ file, namespace });
-                  return asset.contentPath;
-                }}
-              />
-            </Field>
+            <>
+              <Field label="Prompt Template" hint={help.promptTemplate}>
+                <MarkdownEditor
+                  value={val!.promptTemplate}
+                  onChange={(v) => set!({ promptTemplate: v })}
+                  placeholder="You are agent {{ agent.name }}. Your role is {{ agent.role }}..."
+                  contentClassName="min-h-[88px] text-sm font-mono"
+                  imageUploadHandler={async (file) => {
+                    const namespace = "agents/drafts/prompt-template";
+                    const asset = await uploadMarkdownImage.mutateAsync({ file, namespace });
+                    return asset.contentPath;
+                  }}
+                />
+              </Field>
+              <div className="rounded-md border border-amber-500/25 bg-amber-500/10 px-3 py-2 text-xs text-amber-100">
+                Prompt template is replayed on every heartbeat. Prefer small task framing and variables like <code>{"{{ context.* }}"}</code> or <code>{"{{ run.* }}"}</code>; avoid repeating stable instructions here.
+              </div>
+            </>
          )}

          {/* Adapter-specific fields */}
@@ -704,6 +714,9 @@ export function AgentConfigForm(props: AgentConfigFormProps) {
                  }}
                />
              </Field>
+              <div className="rounded-md border border-sky-500/25 bg-sky-500/10 px-3 py-2 text-xs text-sky-100">
+                Bootstrap prompt is only sent for fresh sessions. Put stable setup, habits, and longer reusable guidance here. Frequent changes reduce the value of session reuse because new sessions must replay it.
+              </div>
              {adapterType === "claude_local" && (
                <ClaudeLocalAdvancedFields {...adapterFieldProps} />
              )}
--- a/ui/src/components/Layout.tsx
+++ b/ui/src/components/Layout.tsx
@@ -14,6 +14,7 @@ import { NewGoalDialog } from "./NewGoalDialog";
 import { NewAgentDialog } from "./NewAgentDialog";
 import { ToastViewport } from "./ToastViewport";
 import { MobileBottomNav } from "./MobileBottomNav";
+import { WorktreeBanner } from "./WorktreeBanner";
 import { useDialog } from "../context/DialogContext";
 import { usePanel } from "../context/PanelContext";
 import { useCompany } from "../context/CompanyContext";
@@ -223,7 +224,7 @@ export function Layout() {
    <div
      className={cn(
        "bg-background text-foreground pt-[env(safe-area-inset-top)]",
-        isMobile ? "min-h-dvh" : "flex h-dvh overflow-hidden",
+        isMobile ? "min-h-dvh" : "flex h-dvh flex-col overflow-hidden",
      )}
    >
      <a
@@ -232,145 +233,148 @@ export function Layout() {
      >
        Skip to Main Content
      </a>
-      {/* Mobile backdrop */}
-      {isMobile && sidebarOpen && (
-        <button
-          type="button"
-          className="fixed inset-0 z-40 bg-black/50"
-          onClick={() => setSidebarOpen(false)}
-          aria-label="Close sidebar"
-        />
-      )}
+      <WorktreeBanner />
+      <div className={cn("min-h-0 flex-1", isMobile ? "w-full" : "flex overflow-hidden")}>
+        {/* Mobile backdrop */}
+        {isMobile && sidebarOpen && (
+          <button
+            type="button"
+            className="fixed inset-0 z-40 bg-black/50"
+            onClick={() => setSidebarOpen(false)}
+            aria-label="Close sidebar"
+          />
+        )}

-      {/* Combined sidebar area: company rail + inner sidebar + docs bar */}
-      {isMobile ? (
-        <div
-          className={cn(
-            "fixed inset-y-0 left-0 z-50 flex flex-col overflow-hidden pt-[env(safe-area-inset-top)] transition-transform duration-100 ease-out",
-            sidebarOpen ? "translate-x-0" : "-translate-x-full"
-          )}
-        >
-          <div className="flex flex-1 min-h-0 overflow-hidden">
-            <CompanyRail />
-            {isInstanceSettingsRoute ? <InstanceSidebar /> : <Sidebar />}
-          </div>
-          <div className="border-t border-r border-border px-3 py-2 bg-background">
-            <div className="flex items-center gap-1">
-              <a
-                href="https://docs.paperclip.ing/"
-                target="_blank"
-                rel="noopener noreferrer"
-                className="flex items-center gap-2.5 px-3 py-2 text-[13px] font-medium transition-colors text-foreground/80 hover:bg-accent/50 hover:text-foreground flex-1 min-w-0"
-              >
-                <BookOpen className="h-4 w-4 shrink-0" />
-                <span className="truncate">Documentation</span>
-              </a>
-              <Button variant="ghost" size="icon-sm" className="text-muted-foreground shrink-0" asChild>
-                <Link
-                  to="/instance/settings"
-                  aria-label="Instance settings"
-                  title="Instance settings"
-                  onClick={() => {
-                    if (isMobile) setSidebarOpen(false);
-                  }}
-                >
-                  <Settings className="h-4 w-4" />
-                </Link>
-              </Button>
-              <Button
-                type="button"
-                variant="ghost"
-                size="icon-sm"
-                className="text-muted-foreground shrink-0"
-                onClick={toggleTheme}
-                aria-label={`Switch to ${nextTheme} mode`}
-                title={`Switch to ${nextTheme} mode`}
-              >
-                {theme === "dark" ? <Sun className="h-4 w-4" /> : <Moon className="h-4 w-4" />}
-              </Button>
-            </div>
-          </div>
-        </div>
-      ) : (
-        <div className="flex flex-col shrink-0 h-full">
-          <div className="flex flex-1 min-h-0">
-            <CompanyRail />
-            <div
-              className={cn(
-                "overflow-hidden transition-[width] duration-100 ease-out",
-                sidebarOpen ? "w-60" : "w-0"
-              )}
-            >
-              {isInstanceSettingsRoute ? <InstanceSidebar /> : <Sidebar />}
-            </div>
-          </div>
-          <div className="border-t border-r border-border px-3 py-2">
-            <div className="flex items-center gap-1">
-              <a
-                href="https://docs.paperclip.ing/"
-                target="_blank"
-                rel="noopener noreferrer"
-                className="flex items-center gap-2.5 px-3 py-2 text-[13px] font-medium transition-colors text-foreground/80 hover:bg-accent/50 hover:text-foreground flex-1 min-w-0"
-              >
-                <BookOpen className="h-4 w-4 shrink-0" />
-                <span className="truncate">Documentation</span>
-              </a>
-              <Button variant="ghost" size="icon-sm" className="text-muted-foreground shrink-0" asChild>
-                <Link
-                  to="/instance/settings"
-                  aria-label="Instance settings"
-                  title="Instance settings"
-                  onClick={() => {
-                    if (isMobile) setSidebarOpen(false);
-                  }}
-                >
-                  <Settings className="h-4 w-4" />
-                </Link>
-              </Button>
-              <Button
-                type="button"
-                variant="ghost"
-                size="icon-sm"
-                className="text-muted-foreground shrink-0"
-                onClick={toggleTheme}
-                aria-label={`Switch to ${nextTheme} mode`}
-                title={`Switch to ${nextTheme} mode`}
-              >
-                {theme === "dark" ? <Sun className="h-4 w-4" /> : <Moon className="h-4 w-4" />}
-              </Button>
-            </div>
-          </div>
-        </div>
-      )}
-
-      {/* Main content */}
-      <div className={cn("flex min-w-0 flex-col", isMobile ? "w-full" : "h-full flex-1")}>
-        <div
-          className={cn(
-            isMobile && "sticky top-0 z-20 bg-background/95 backdrop-blur supports-[backdrop-filter]:bg-background/85",
-          )}
-        >
-          <BreadcrumbBar />
-        </div>
-        <div className={cn(isMobile ? "block" : "flex flex-1 min-h-0")}>
-          <main
-            id="main-content"
-            tabIndex={-1}
+        {/* Combined sidebar area: company rail + inner sidebar + docs bar */}
+        {isMobile ? (
+          <div
            className={cn(
-              "flex-1 p-4 md:p-6",
-              isMobile ? "overflow-visible pb-[calc(5rem+env(safe-area-inset-bottom))]" : "overflow-auto",
+              "fixed inset-y-0 left-0 z-50 flex flex-col overflow-hidden pt-[env(safe-area-inset-top)] transition-transform duration-100 ease-out",
+              sidebarOpen ? "translate-x-0" : "-translate-x-full"
            )}
          >
-            {hasUnknownCompanyPrefix ? (
-              <NotFoundPage
-                scope="invalid_company_prefix"
-                requestedPrefix={companyPrefix ?? selectedCompany?.issuePrefix}
-              />
-            ) : (
-              <Outlet />
+            <div className="flex flex-1 min-h-0 overflow-hidden">
+              <CompanyRail />
+              {isInstanceSettingsRoute ? <InstanceSidebar /> : <Sidebar />}
+            </div>
+            <div className="border-t border-r border-border px-3 py-2 bg-background">
+              <div className="flex items-center gap-1">
+                <a
+                  href="https://docs.paperclip.ing/"
+                  target="_blank"
+                  rel="noopener noreferrer"
+                  className="flex items-center gap-2.5 px-3 py-2 text-[13px] font-medium transition-colors text-foreground/80 hover:bg-accent/50 hover:text-foreground flex-1 min-w-0"
+                >
+                  <BookOpen className="h-4 w-4 shrink-0" />
+                  <span className="truncate">Documentation</span>
+                </a>
+                <Button variant="ghost" size="icon-sm" className="text-muted-foreground shrink-0" asChild>
+                  <Link
+                    to="/instance/settings"
+                    aria-label="Instance settings"
+                    title="Instance settings"
+                    onClick={() => {
+                      if (isMobile) setSidebarOpen(false);
+                    }}
+                  >
+                    <Settings className="h-4 w-4" />
+                  </Link>
+                </Button>
+                <Button
+                  type="button"
+                  variant="ghost"
+                  size="icon-sm"
+                  className="text-muted-foreground shrink-0"
+                  onClick={toggleTheme}
+                  aria-label={`Switch to ${nextTheme} mode`}
+                  title={`Switch to ${nextTheme} mode`}
+                >
+                  {theme === "dark" ? <Sun className="h-4 w-4" /> : <Moon className="h-4 w-4" />}
+                </Button>
+              </div>
+            </div>
+          </div>
+        ) : (
+          <div className="flex h-full flex-col shrink-0">
+            <div className="flex flex-1 min-h-0">
+              <CompanyRail />
+              <div
+                className={cn(
+                  "overflow-hidden transition-[width] duration-100 ease-out",
+                  sidebarOpen ? "w-60" : "w-0"
+                )}
+              >
+                {isInstanceSettingsRoute ? <InstanceSidebar /> : <Sidebar />}
+              </div>
+            </div>
+            <div className="border-t border-r border-border px-3 py-2">
+              <div className="flex items-center gap-1">
+                <a
+                  href="https://docs.paperclip.ing/"
+                  target="_blank"
+                  rel="noopener noreferrer"
+                  className="flex items-center gap-2.5 px-3 py-2 text-[13px] font-medium transition-colors text-foreground/80 hover:bg-accent/50 hover:text-foreground flex-1 min-w-0"
+                >
+                  <BookOpen className="h-4 w-4 shrink-0" />
+                  <span className="truncate">Documentation</span>
+                </a>
+                <Button variant="ghost" size="icon-sm" className="text-muted-foreground shrink-0" asChild>
+                  <Link
+                    to="/instance/settings"
+                    aria-label="Instance settings"
+                    title="Instance settings"
+                    onClick={() => {
+                      if (isMobile) setSidebarOpen(false);
+                    }}
+                  >
+                    <Settings className="h-4 w-4" />
+                  </Link>
+                </Button>
+                <Button
+                  type="button"
+                  variant="ghost"
+                  size="icon-sm"
+                  className="text-muted-foreground shrink-0"
+                  onClick={toggleTheme}
+                  aria-label={`Switch to ${nextTheme} mode`}
+                  title={`Switch to ${nextTheme} mode`}
+                >
+                  {theme === "dark" ? <Sun className="h-4 w-4" /> : <Moon className="h-4 w-4" />}
+                </Button>
+              </div>
+            </div>
+          </div>
+        )}
+
+        {/* Main content */}
+        <div className={cn("flex min-w-0 flex-col", isMobile ? "w-full" : "h-full flex-1")}>
+          <div
+            className={cn(
+              isMobile && "sticky top-0 z-20 bg-background/95 backdrop-blur supports-[backdrop-filter]:bg-background/85",
            )}
-          </main>
-          <PropertiesPanel />
+          >
+            <BreadcrumbBar />
+          </div>
+          <div className={cn(isMobile ? "block" : "flex flex-1 min-h-0")}>
+            <main
+              id="main-content"
+              tabIndex={-1}
+              className={cn(
+                "flex-1 p-4 md:p-6",
+                isMobile ? "overflow-visible pb-[calc(5rem+env(safe-area-inset-bottom))]" : "overflow-auto",
+              )}
+            >
+              {hasUnknownCompanyPrefix ? (
+                <NotFoundPage
+                  scope="invalid_company_prefix"
+                  requestedPrefix={companyPrefix ?? selectedCompany?.issuePrefix}
+                />
+              ) : (
+                <Outlet />
+              )}
+            </main>
+            <PropertiesPanel />
+          </div>
        </div>
      </div>
      {isMobile && <MobileBottomNav visible={mobileNavVisible} />}
--- a/ui/src/components/WorktreeBanner.tsx
+++ b/ui/src/components/WorktreeBanner.tsx
@@ -0,0 +1,25 @@
+import { getWorktreeUiBranding } from "../lib/worktree-branding";
+
+export function WorktreeBanner() {
+  const branding = getWorktreeUiBranding();
+  if (!branding) return null;
+
+  return (
+    <div
+      className="relative overflow-hidden border-b px-3 py-1.5 text-[11px] font-medium tracking-[0.2em] uppercase"
+      style={{
+        backgroundColor: branding.color,
+        color: branding.textColor,
+        borderColor: `${branding.textColor}22`,
+        boxShadow: `inset 0 -1px 0 ${branding.textColor}18`,
+        backgroundImage: `linear-gradient(90deg, ${branding.textColor}14, transparent 28%, transparent 72%, ${branding.textColor}12), repeating-linear-gradient(135deg, transparent 0 10px, ${branding.textColor}08 10px 20px)`,
+      }}
+    >
+      <div className="flex items-center gap-2 overflow-hidden whitespace-nowrap">
+        <span className="shrink-0 opacity-70">Worktree</span>
+        <span className="h-1.5 w-1.5 shrink-0 rounded-full bg-current opacity-70" aria-hidden="true" />
+        <span className="truncate font-semibold tracking-[0.12em]">{branding.name}</span>
+      </div>
+    </div>
+  );
+}
--- a/ui/src/components/agent-config-primitives.tsx
+++ b/ui/src/components/agent-config-primitives.tsx
@@ -26,7 +26,7 @@ export const help: Record<string, string> = {
  capabilities: "Describes what this agent can do. Shown in the org chart and used for task routing.",
  adapterType: "How this agent runs: local CLI (Claude/Codex/OpenCode), OpenClaw Gateway, spawned process, or generic HTTP webhook.",
  cwd: "Default working directory fallback for local adapters. Use an absolute path on the machine running Paperclip.",
-  promptTemplate: "The prompt sent to the agent on each heartbeat. Supports {{ agent.id }}, {{ agent.name }}, {{ agent.role }} variables.",
+  promptTemplate: "Sent on every heartbeat. Keep this small and dynamic. Use it for current-task framing, not large static instructions. Supports {{ agent.id }}, {{ agent.name }}, {{ agent.role }} and other template variables.",
  model: "Override the default model used by the adapter.",
  thinkingEffort: "Control model reasoning depth. Supported values vary by adapter/model.",
  chrome: "Enable Claude's Chrome integration by passing --chrome.",
@@ -44,7 +44,7 @@ export const help: Record<string, string> = {
  args: "Command-line arguments, comma-separated.",
  extraArgs: "Extra CLI arguments for local adapters, comma-separated.",
  envVars: "Environment variables injected into the adapter process. Use plain values or secret references.",
-  bootstrapPrompt: "Optional prompt prepended on the first run to bootstrap the agent's environment or habits.",
+  bootstrapPrompt: "Only sent when Paperclip starts a fresh session. Use this for stable setup guidance that should not be repeated on every heartbeat.",
  payloadTemplateJson: "Optional JSON merged into remote adapter request payloads before Paperclip adds its standard wake and workspace fields.",
  webhookUrl: "The URL that receives POST requests when the agent is invoked.",
  heartbeatInterval: "Run this agent automatically on a timer. Useful for periodic tasks like checking for new work.",
--- a/ui/src/hooks/useCompanyPageMemory.test.ts
+++ b/ui/src/hooks/useCompanyPageMemory.test.ts
@@ -0,0 +1,71 @@
+import { describe, expect, it } from "vitest";
+import {
+  getRememberedPathOwnerCompanyId,
+  sanitizeRememberedPathForCompany,
+} from "../lib/company-page-memory";
+
+const companies = [
+  { id: "for", issuePrefix: "FOR" },
+  { id: "pap", issuePrefix: "PAP" },
+];
+
+describe("getRememberedPathOwnerCompanyId", () => {
+  it("uses the route company instead of stale selected-company state for prefixed routes", () => {
+    expect(
+      getRememberedPathOwnerCompanyId({
+        companies,
+        pathname: "/FOR/issues/FOR-1",
+        fallbackCompanyId: "pap",
+      }),
+    ).toBe("for");
+  });
+
+  it("skips saving when a prefixed route cannot yet be resolved to a known company", () => {
+    expect(
+      getRememberedPathOwnerCompanyId({
+        companies: [],
+        pathname: "/FOR/issues/FOR-1",
+        fallbackCompanyId: "pap",
+      }),
+    ).toBeNull();
+  });
+
+  it("falls back to the previous company for unprefixed board routes", () => {
+    expect(
+      getRememberedPathOwnerCompanyId({
+        companies,
+        pathname: "/dashboard",
+        fallbackCompanyId: "pap",
+      }),
+    ).toBe("pap");
+  });
+});
+
+describe("sanitizeRememberedPathForCompany", () => {
+  it("keeps remembered issue paths that belong to the target company", () => {
+    expect(
+      sanitizeRememberedPathForCompany({
+        path: "/issues/PAP-12",
+        companyPrefix: "PAP",
+      }),
+    ).toBe("/issues/PAP-12");
+  });
+
+  it("falls back to dashboard for remembered issue identifiers from another company", () => {
+    expect(
+      sanitizeRememberedPathForCompany({
+        path: "/issues/FOR-1",
+        companyPrefix: "PAP",
+      }),
+    ).toBe("/dashboard");
+  });
+
+  it("falls back to dashboard when no remembered path exists", () => {
+    expect(
+      sanitizeRememberedPathForCompany({
+        path: null,
+        companyPrefix: "PAP",
+      }),
+    ).toBe("/dashboard");
+  });
+});
--- a/ui/src/hooks/useCompanyPageMemory.ts
+++ b/ui/src/hooks/useCompanyPageMemory.ts
@@ -1,10 +1,14 @@
-import { useEffect, useRef } from "react";
+import { useEffect, useMemo, useRef } from "react";
 import { useLocation, useNavigate } from "@/lib/router";
 import { useCompany } from "../context/CompanyContext";
 import { toCompanyRelativePath } from "../lib/company-routes";
+import {
+  getRememberedPathOwnerCompanyId,
+  isRememberableCompanyPath,
+  sanitizeRememberedPathForCompany,
+} from "../lib/company-page-memory";

 const STORAGE_KEY = "paperclip.companyPaths";
-const GLOBAL_SEGMENTS = new Set(["auth", "invite", "board-claim", "docs"]);

 function getCompanyPaths(): Record<string, string> {
  try {
@@ -22,36 +26,36 @@ function saveCompanyPath(companyId: string, path: string) {
  localStorage.setItem(STORAGE_KEY, JSON.stringify(paths));
 }

-function isRememberableCompanyPath(path: string): boolean {
-  const pathname = path.split("?")[0] ?? "";
-  const segments = pathname.split("/").filter(Boolean);
-  if (segments.length === 0) return true;
-  const [root] = segments;
-  if (GLOBAL_SEGMENTS.has(root!)) return false;
-  return true;
-}
-
 /**
 * Remembers the last visited page per company and navigates to it on company switch.
 * Falls back to /dashboard if no page was previously visited for a company.
 */
 export function useCompanyPageMemory() {
-  const { selectedCompanyId, selectedCompany, selectionSource } = useCompany();
+  const { companies, selectedCompanyId, selectedCompany, selectionSource } = useCompany();
  const location = useLocation();
  const navigate = useNavigate();
  const prevCompanyId = useRef<string | null>(selectedCompanyId);
+  const rememberedPathOwnerCompanyId = useMemo(
+    () =>
+      getRememberedPathOwnerCompanyId({
+        companies,
+        pathname: location.pathname,
+        fallbackCompanyId: prevCompanyId.current,
+      }),
+    [companies, location.pathname],
+  );

  // Save current path for current company on every location change.
  // Uses prevCompanyId ref so we save under the correct company even
  // during the render where selectedCompanyId has already changed.
  const fullPath = location.pathname + location.search;
  useEffect(() => {
-    const companyId = prevCompanyId.current;
+    const companyId = rememberedPathOwnerCompanyId;
    const relativePath = toCompanyRelativePath(fullPath);
    if (companyId && isRememberableCompanyPath(relativePath)) {
      saveCompanyPath(companyId, relativePath);
    }
-  }, [fullPath]);
+  }, [fullPath, rememberedPathOwnerCompanyId]);

  // Navigate to saved path when company changes
  useEffect(() => {
@@ -63,9 +67,10 @@ export function useCompanyPageMemory() {
    ) {
      if (selectionSource !== "route_sync" && selectedCompany) {
        const paths = getCompanyPaths();
-        const savedPath = paths[selectedCompanyId];
-        const relativePath = savedPath ? toCompanyRelativePath(savedPath) : "/dashboard";
-        const targetPath = isRememberableCompanyPath(relativePath) ? relativePath : "/dashboard";
+        const targetPath = sanitizeRememberedPathForCompany({
+          path: paths[selectedCompanyId],
+          companyPrefix: selectedCompany.issuePrefix,
+        });
        navigate(`/${selectedCompany.issuePrefix}${targetPath}`, { replace: true });
      }
    }
--- a/ui/src/lib/company-page-memory.ts
+++ b/ui/src/lib/company-page-memory.ts
@@ -0,0 +1,65 @@
+import {
+  extractCompanyPrefixFromPath,
+  normalizeCompanyPrefix,
+  toCompanyRelativePath,
+} from "./company-routes";
+
+const GLOBAL_SEGMENTS = new Set(["auth", "invite", "board-claim", "docs"]);
+
+export function isRememberableCompanyPath(path: string): boolean {
+  const pathname = path.split("?")[0] ?? "";
+  const segments = pathname.split("/").filter(Boolean);
+  if (segments.length === 0) return true;
+  const [root] = segments;
+  if (GLOBAL_SEGMENTS.has(root!)) return false;
+  return true;
+}
+
+function findCompanyByPrefix<T extends { id: string; issuePrefix: string }>(params: {
+  companies: T[];
+  companyPrefix: string;
+}): T | null {
+  const normalizedPrefix = normalizeCompanyPrefix(params.companyPrefix);
+  return params.companies.find((company) => normalizeCompanyPrefix(company.issuePrefix) === normalizedPrefix) ?? null;
+}
+
+export function getRememberedPathOwnerCompanyId<T extends { id: string; issuePrefix: string }>(params: {
+  companies: T[];
+  pathname: string;
+  fallbackCompanyId: string | null;
+}): string | null {
+  const routeCompanyPrefix = extractCompanyPrefixFromPath(params.pathname);
+  if (!routeCompanyPrefix) {
+    return params.fallbackCompanyId;
+  }
+
+  return findCompanyByPrefix({
+    companies: params.companies,
+    companyPrefix: routeCompanyPrefix,
+  })?.id ?? null;
+}
+
+export function sanitizeRememberedPathForCompany(params: {
+  path: string | null | undefined;
+  companyPrefix: string;
+}): string {
+  const relativePath = params.path ? toCompanyRelativePath(params.path) : "/dashboard";
+  if (!isRememberableCompanyPath(relativePath)) {
+    return "/dashboard";
+  }
+
+  const pathname = relativePath.split("?")[0] ?? "";
+  const segments = pathname.split("/").filter(Boolean);
+  const [root, entityId] = segments;
+  if (root === "issues" && entityId) {
+    const identifierMatch = /^([A-Za-z]+)-\d+$/.exec(entityId);
+    if (
+      identifierMatch &&
+      normalizeCompanyPrefix(identifierMatch[1] ?? "") !== normalizeCompanyPrefix(params.companyPrefix)
+    ) {
+      return "/dashboard";
+    }
+  }
+
+  return relativePath;
+}
--- a/ui/src/lib/worktree-branding.ts
+++ b/ui/src/lib/worktree-branding.ts
@@ -0,0 +1,65 @@
+export type WorktreeUiBranding = {
+  enabled: true;
+  name: string;
+  color: string;
+  textColor: string;
+};
+
+function readMetaContent(name: string): string | null {
+  if (typeof document === "undefined") return null;
+  const element = document.querySelector(`meta[name="${name}"]`);
+  const content = element?.getAttribute("content")?.trim();
+  return content ? content : null;
+}
+
+function normalizeHexColor(value: string | null): string | null {
+  if (!value) return null;
+  const hex = value.startsWith("#") ? value.slice(1) : value;
+  if (/^[0-9a-fA-F]{3}$/.test(hex)) {
+    return `#${hex.split("").map((char) => `${char}${char}`).join("").toLowerCase()}`;
+  }
+  if (/^[0-9a-fA-F]{6}$/.test(hex)) {
+    return `#${hex.toLowerCase()}`;
+  }
+  return null;
+}
+
+function hexToRgb(color: string): { r: number; g: number; b: number } {
+  const normalized = normalizeHexColor(color) ?? "#000000";
+  return {
+    r: Number.parseInt(normalized.slice(1, 3), 16),
+    g: Number.parseInt(normalized.slice(3, 5), 16),
+    b: Number.parseInt(normalized.slice(5, 7), 16),
+  };
+}
+
+function relativeLuminanceChannel(value: number): number {
+  const normalized = value / 255;
+  return normalized <= 0.03928 ? normalized / 12.92 : ((normalized + 0.055) / 1.055) ** 2.4;
+}
+
+function pickReadableTextColor(background: string): string {
+  const { r, g, b } = hexToRgb(background);
+  const luminance =
+    (0.2126 * relativeLuminanceChannel(r)) +
+    (0.7152 * relativeLuminanceChannel(g)) +
+    (0.0722 * relativeLuminanceChannel(b));
+  const whiteContrast = 1.05 / (luminance + 0.05);
+  const blackContrast = (luminance + 0.05) / 0.05;
+  return whiteContrast >= blackContrast ? "#f8fafc" : "#111827";
+}
+
+export function getWorktreeUiBranding(): WorktreeUiBranding | null {
+  if (readMetaContent("paperclip-worktree-enabled") !== "true") return null;
+
+  const name = readMetaContent("paperclip-worktree-name");
+  const color = normalizeHexColor(readMetaContent("paperclip-worktree-color"));
+  if (!name || !color) return null;
+
+  return {
+    enabled: true,
+    name,
+    color,
+    textColor: normalizeHexColor(readMetaContent("paperclip-worktree-text-color")) ?? pickReadableTextColor(color),
+  };
+}