diff --git a/.github/workflows/pr-policy.yml b/.github/workflows/pr-policy.yml index a8042120..16953380 100644 --- a/.github/workflows/pr-policy.yml +++ b/.github/workflows/pr-policy.yml @@ -13,8 +13,6 @@ jobs: policy: runs-on: ubuntu-latest timeout-minutes: 10 - permissions: - pull-requests: read steps: - name: Checkout repository @@ -33,38 +31,19 @@ jobs: with: node-version: 20 - - name: Enforce lockfile policy when manifests change - env: - GH_TOKEN: ${{ github.token }} + - name: Block manual lockfile edits + if: github.head_ref != 'chore/refresh-lockfile' run: | - changed="$(gh api "repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files" --paginate --jq '.[].filename')" - manifest_pattern='(^|/)package\.json$|^pnpm-workspace\.yaml$|^\.npmrc$|^pnpmfile\.(cjs|js|mjs)$' - - manifest_changed=false - lockfile_changed=false - - if printf '%s\n' "$changed" | grep -Eq "$manifest_pattern"; then - manifest_changed=true - fi - + changed="$(git diff --name-only "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}")" if printf '%s\n' "$changed" | grep -qx 'pnpm-lock.yaml'; then - lockfile_changed=true - fi - - if [ "$lockfile_changed" = true ] && [ "$manifest_changed" != true ]; then - echo "pnpm-lock.yaml changed without a dependency manifest change." >&2 + echo "Do not commit pnpm-lock.yaml in pull requests. CI owns lockfile updates." exit 1 fi - if [ "$manifest_changed" = true ]; then + - name: Validate dependency resolution when manifests change + run: | + changed="$(git diff --name-only "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}")" + manifest_pattern='(^|/)package\.json$|^pnpm-workspace\.yaml$|^\.npmrc$|^pnpmfile\.(cjs|js|mjs)$' + if printf '%s\n' "$changed" | grep -Eq "$manifest_pattern"; then pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile - - if ! git diff --quiet -- pnpm-lock.yaml; then - if [ "${{ github.event.pull_request.head.repo.full_name }}" = "${{ github.repository }}" ]; then - echo "pnpm-lock.yaml is stale for this PR. Wait for the Refresh Lockfile workflow to push the bot commit, then rerun checks." >&2 - else - echo "pnpm-lock.yaml is stale for this fork PR. Run pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile and commit pnpm-lock.yaml." >&2 - fi - exit 1 - fi fi diff --git a/.github/workflows/pr-verify.yml b/.github/workflows/pr-verify.yml index 60d2f075..e84e448a 100644 --- a/.github/workflows/pr-verify.yml +++ b/.github/workflows/pr-verify.yml @@ -30,7 +30,7 @@ jobs: cache: pnpm - name: Install dependencies - run: pnpm install --frozen-lockfile + run: pnpm install --no-frozen-lockfile - name: Typecheck run: pnpm -r typecheck diff --git a/.github/workflows/refresh-lockfile-pr.yml b/.github/workflows/refresh-lockfile-pr.yml deleted file mode 100644 index 7d69588b..00000000 --- a/.github/workflows/refresh-lockfile-pr.yml +++ /dev/null @@ -1,111 +0,0 @@ -name: Refresh Lockfile - -on: - pull_request: - branches: - - master - types: - - opened - - synchronize - - reopened - - ready_for_review - -concurrency: - group: refresh-lockfile-pr-${{ github.event.pull_request.number }} - cancel-in-progress: true - -jobs: - refresh: - runs-on: ubuntu-latest - timeout-minutes: 10 - permissions: - contents: write - pull-requests: read - - steps: - - name: Detect dependency manifest changes - id: changes - env: - GH_TOKEN: ${{ github.token }} - run: | - changed="$(gh api "repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files" --paginate --jq '.[].filename')" - manifest_pattern='(^|/)package\.json$|^pnpm-workspace\.yaml$|^\.npmrc$|^pnpmfile\.(cjs|js|mjs)$' - - if printf '%s\n' "$changed" | grep -Eq "$manifest_pattern"; then - echo "manifest_changed=true" >> "$GITHUB_OUTPUT" - else - echo "manifest_changed=false" >> "$GITHUB_OUTPUT" - fi - - if [ "${{ github.event.pull_request.head.repo.full_name }}" = "${{ github.repository }}" ]; then - echo "same_repo=true" >> "$GITHUB_OUTPUT" - else - echo "same_repo=false" >> "$GITHUB_OUTPUT" - fi - - - name: Checkout pull request head - if: steps.changes.outputs.manifest_changed == 'true' - uses: actions/checkout@v4 - with: - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.ref }} - fetch-depth: 0 - - - name: Setup pnpm - if: steps.changes.outputs.manifest_changed == 'true' - uses: pnpm/action-setup@v4 - with: - version: 9.15.4 - run_install: false - - - name: Setup Node.js - if: steps.changes.outputs.manifest_changed == 'true' - uses: actions/setup-node@v4 - with: - node-version: 20 - cache: pnpm - - - name: Refresh pnpm lockfile - if: steps.changes.outputs.manifest_changed == 'true' - run: pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile - - - name: Fail on unexpected file changes - if: steps.changes.outputs.manifest_changed == 'true' - run: | - changed="$(git status --porcelain)" - if [ -z "$changed" ]; then - echo "Lockfile is already up to date." - exit 0 - fi - if printf '%s\n' "$changed" | grep -Fvq ' pnpm-lock.yaml'; then - echo "Unexpected files changed during lockfile refresh:" - echo "$changed" - exit 1 - fi - - - name: Commit refreshed lockfile to same-repo PR branch - if: steps.changes.outputs.manifest_changed == 'true' && steps.changes.outputs.same_repo == 'true' - run: | - if git diff --quiet -- pnpm-lock.yaml; then - echo "Lockfile unchanged, nothing to do." - exit 0 - fi - - git config user.name "lockfile-bot" - git config user.email "lockfile-bot@users.noreply.github.com" - git add pnpm-lock.yaml - git commit -m "chore(lockfile): refresh pnpm-lock.yaml" - git push origin "HEAD:${{ github.event.pull_request.head.ref }}" - - - name: Fail fork PRs that need a lockfile refresh - if: steps.changes.outputs.manifest_changed == 'true' && steps.changes.outputs.same_repo != 'true' - run: | - if git diff --quiet -- pnpm-lock.yaml; then - echo "Lockfile unchanged, nothing to do." - exit 0 - fi - - echo "This fork PR changes dependency manifests and requires a refreshed pnpm-lock.yaml." >&2 - echo "Run: pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile" >&2 - echo "Then commit pnpm-lock.yaml to the PR branch." >&2 - exit 1 diff --git a/.github/workflows/refresh-lockfile.yml b/.github/workflows/refresh-lockfile.yml new file mode 100644 index 00000000..a879e5bc --- /dev/null +++ b/.github/workflows/refresh-lockfile.yml @@ -0,0 +1,81 @@ +name: Refresh Lockfile + +on: + push: + branches: + - master + workflow_dispatch: + +concurrency: + group: refresh-lockfile-master + cancel-in-progress: false + +jobs: + refresh: + runs-on: ubuntu-latest + timeout-minutes: 10 + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9.15.4 + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: pnpm + + - name: Refresh pnpm lockfile + run: pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile + + - name: Fail on unexpected file changes + run: | + changed="$(git status --porcelain)" + if [ -z "$changed" ]; then + echo "Lockfile is already up to date." + exit 0 + fi + if printf '%s\n' "$changed" | grep -Fvq ' pnpm-lock.yaml'; then + echo "Unexpected files changed during lockfile refresh:" + echo "$changed" + exit 1 + fi + + - name: Create or update pull request + env: + GH_TOKEN: ${{ github.token }} + run: | + if git diff --quiet -- pnpm-lock.yaml; then + echo "Lockfile unchanged, nothing to do." + exit 0 + fi + + BRANCH="chore/refresh-lockfile" + git config user.name "lockfile-bot" + git config user.email "lockfile-bot@users.noreply.github.com" + + git checkout -B "$BRANCH" + git add pnpm-lock.yaml + git commit -m "chore(lockfile): refresh pnpm-lock.yaml" + git push --force origin "$BRANCH" + + # Create PR if one doesn't already exist + existing=$(gh pr list --head "$BRANCH" --json number --jq '.[0].number') + if [ -z "$existing" ]; then + gh pr create \ + --head "$BRANCH" \ + --title "chore(lockfile): refresh pnpm-lock.yaml" \ + --body "Auto-generated lockfile refresh after dependencies changed on master. This PR only updates pnpm-lock.yaml." + echo "Created new PR." + else + echo "PR #$existing already exists, branch updated via force push." + fi diff --git a/cli/src/commands/worktree.ts b/cli/src/commands/worktree.ts index 4f0ed887..7311793b 100644 --- a/cli/src/commands/worktree.ts +++ b/cli/src/commands/worktree.ts @@ -514,6 +514,7 @@ async function ensureEmbeddedPostgres(dataDir: string, preferredPort: number): P password: "paperclip", port, persistent: true, + initdbFlags: ["--encoding=UTF8", "--locale=C"], onLog: () => {}, onError: () => {}, }); diff --git a/doc/DEVELOPING.md b/doc/DEVELOPING.md index b73a53f1..1ca1409b 100644 --- a/doc/DEVELOPING.md +++ b/doc/DEVELOPING.md @@ -19,9 +19,9 @@ Current implementation status: GitHub Actions owns `pnpm-lock.yaml`. -- Same-repo pull requests that change dependency manifests are auto-refreshed by GitHub Actions before merge. -- Fork pull requests that change dependency manifests must include the refreshed `pnpm-lock.yaml`. -- Pull request CI validates lockfile freshness when manifests change and verifies with `--frozen-lockfile`. +- Do not commit `pnpm-lock.yaml` in pull requests. +- Pull request CI validates dependency resolution when manifests change. +- Pushes to `master` regenerate `pnpm-lock.yaml` with `pnpm install --lockfile-only --no-frozen-lockfile`, commit it back if needed, and then run verification with `--frozen-lockfile`. ## Start Dev diff --git a/packages/adapters/opencode-local/src/server/models.ts b/packages/adapters/opencode-local/src/server/models.ts index dd2eb2c6..a4d1a46d 100644 --- a/packages/adapters/opencode-local/src/server/models.ts +++ b/packages/adapters/opencode-local/src/server/models.ts @@ -7,6 +7,7 @@ import { } from "@paperclipai/adapter-utils/server-utils"; const MODELS_CACHE_TTL_MS = 60_000; +const MODELS_DISCOVERY_TIMEOUT_MS = 20_000; function resolveOpenCodeCommand(input: unknown): string { const envOverride = @@ -115,14 +116,14 @@ export async function discoverOpenCodeModels(input: { { cwd, env: runtimeEnv, - timeoutSec: 20, + timeoutSec: MODELS_DISCOVERY_TIMEOUT_MS / 1000, graceSec: 3, onLog: async () => {}, }, ); if (result.timedOut) { - throw new Error("`opencode models` timed out."); + throw new Error(`\`opencode models\` timed out after ${MODELS_DISCOVERY_TIMEOUT_MS / 1000}s.`); } if ((result.exitCode ?? 1) !== 0) { const detail = firstNonEmptyLine(result.stderr) || firstNonEmptyLine(result.stdout); diff --git a/packages/db/src/client.ts b/packages/db/src/client.ts index c4275dc4..83b4aa78 100644 --- a/packages/db/src/client.ts +++ b/packages/db/src/client.ts @@ -730,7 +730,7 @@ export async function ensurePostgresDatabase( `; if (existing.length > 0) return "exists"; - await sql.unsafe(`create database "${databaseName}"`); + await sql.unsafe(`create database "${databaseName}" encoding 'UTF8' lc_collate 'C' lc_ctype 'C' template template0`); return "created"; } finally { await sql.end(); diff --git a/packages/db/src/migration-runtime.ts b/packages/db/src/migration-runtime.ts index bc90b762..10b7b9b1 100644 --- a/packages/db/src/migration-runtime.ts +++ b/packages/db/src/migration-runtime.ts @@ -96,6 +96,7 @@ async function ensureEmbeddedPostgresConnection( password: "paperclip", port: preferredPort, persistent: true, + initdbFlags: ["--encoding=UTF8", "--locale=C"], onLog: () => {}, onError: () => {}, }); diff --git a/server/package.json b/server/package.json index 1dd9b073..cd30cf13 100644 --- a/server/package.json +++ b/server/package.json @@ -38,9 +38,9 @@ "@paperclipai/adapter-codex-local": "workspace:*", "@paperclipai/adapter-cursor-local": "workspace:*", "@paperclipai/adapter-gemini-local": "workspace:*", + "@paperclipai/adapter-openclaw-gateway": "workspace:*", "@paperclipai/adapter-opencode-local": "workspace:*", "@paperclipai/adapter-pi-local": "workspace:*", - "@paperclipai/adapter-openclaw-gateway": "workspace:*", "@paperclipai/adapter-utils": "workspace:*", "@paperclipai/db": "workspace:*", "@paperclipai/shared": "workspace:*", diff --git a/server/src/index.ts b/server/src/index.ts index c220df92..27b559eb 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -53,6 +53,7 @@ type EmbeddedPostgresCtor = new (opts: { password: string; port: number; persistent: boolean; + initdbFlags?: string[]; onLog?: (message: unknown) => void; onError?: (message: unknown) => void; }) => EmbeddedPostgresInstance; @@ -334,6 +335,7 @@ export async function startServer(): Promise { password: "paperclip", port, persistent: true, + initdbFlags: ["--encoding=UTF8", "--locale=C"], onLog: appendEmbeddedPostgresLog, onError: appendEmbeddedPostgresLog, }); @@ -512,11 +514,14 @@ export async function startServer(): Promise { if (config.heartbeatSchedulerEnabled) { const heartbeat = heartbeatService(db as any); - // Reap orphaned runs at startup (no threshold -- runningProcesses is empty) - void heartbeat.reapOrphanedRuns().catch((err) => { - logger.error({ err }, "startup reap of orphaned heartbeat runs failed"); - }); - + // Reap orphaned running runs at startup while in-memory execution state is empty, + // then resume any persisted queued runs that were waiting on the previous process. + void heartbeat + .reapOrphanedRuns() + .then(() => heartbeat.resumeQueuedRuns()) + .catch((err) => { + logger.error({ err }, "startup heartbeat recovery failed"); + }); setInterval(() => { void heartbeat .tickTimers(new Date()) @@ -529,11 +534,13 @@ export async function startServer(): Promise { logger.error({ err }, "heartbeat timer tick failed"); }); - // Periodically reap orphaned runs (5-min staleness threshold) + // Periodically reap orphaned runs (5-min staleness threshold) and make sure + // persisted queued work is still being driven forward. void heartbeat .reapOrphanedRuns({ staleThresholdMs: 5 * 60 * 1000 }) + .then(() => heartbeat.resumeQueuedRuns()) .catch((err) => { - logger.error({ err }, "periodic reap of orphaned heartbeat runs failed"); + logger.error({ err }, "periodic heartbeat recovery failed"); }); }, config.heartbeatSchedulerIntervalMs); } diff --git a/server/src/services/heartbeat.ts b/server/src/services/heartbeat.ts index e782bc25..f0665c9a 100644 --- a/server/src/services/heartbeat.ts +++ b/server/src/services/heartbeat.ts @@ -455,6 +455,7 @@ export function heartbeatService(db: Db) { const runLogStore = getRunLogStore(); const secretsSvc = secretService(db); const issuesSvc = issueService(db); + const activeRunExecutions = new Set(); async function getAgent(agentId: string) { return db @@ -959,7 +960,7 @@ export function heartbeatService(db: Db) { const reaped: string[] = []; for (const run of activeRuns) { - if (runningProcesses.has(run.id)) continue; + if (runningProcesses.has(run.id) || activeRunExecutions.has(run.id)) continue; // Apply staleness threshold to avoid false positives if (staleThresholdMs > 0) { @@ -998,6 +999,18 @@ export function heartbeatService(db: Db) { return { reaped: reaped.length, runIds: reaped }; } + async function resumeQueuedRuns() { + const queuedRuns = await db + .select({ agentId: heartbeatRuns.agentId }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.status, "queued")); + + const agentIds = [...new Set(queuedRuns.map((r) => r.agentId))]; + for (const agentId of agentIds) { + await startNextQueuedRunForAgent(agentId); + } + } + async function updateRuntimeState( agent: typeof agents.$inferSelect, run: typeof heartbeatRuns.$inferSelect, @@ -1089,6 +1102,9 @@ export function heartbeatService(db: Db) { run = claimed; } + activeRunExecutions.add(run.id); + + try { const agent = await getAgent(run.agentId); if (!agent) { await setRunStatus(runId, "failed", { @@ -1676,10 +1692,41 @@ export function heartbeatService(db: Db) { } await finalizeAgentStatus(agent.id, "failed"); - } finally { - await releaseRuntimeServicesForRun(run.id); - await startNextQueuedRunForAgent(agent.id); } + } catch (outerErr) { + // Setup code before adapter.execute threw (e.g. ensureRuntimeState, resolveWorkspaceForRun). + // The inner catch did not fire, so we must record the failure here. + const message = outerErr instanceof Error ? outerErr.message : "Unknown setup failure"; + logger.error({ err: outerErr, runId }, "heartbeat execution setup failed"); + await setRunStatus(runId, "failed", { + error: message, + errorCode: "adapter_failed", + finishedAt: new Date(), + }).catch(() => undefined); + await setWakeupStatus(run.wakeupRequestId, "failed", { + finishedAt: new Date(), + error: message, + }).catch(() => undefined); + const failedRun = await getRun(runId).catch(() => null); + if (failedRun) { + // Emit a run-log event so the failure is visible in the run timeline, + // consistent with what the inner catch block does for adapter failures. + await appendRunEvent(failedRun, 1, { + eventType: "error", + stream: "system", + level: "error", + message, + }).catch(() => undefined); + await releaseIssueExecutionAndPromote(failedRun).catch(() => undefined); + } + // Ensure the agent is not left stuck in "running" if the inner catch handler's + // DB calls threw (e.g. a transient DB error in finalizeAgentStatus). + await finalizeAgentStatus(run.agentId, "failed").catch(() => undefined); + } finally { + await releaseRuntimeServicesForRun(run.id).catch(() => undefined); + activeRunExecutions.delete(run.id); + await startNextQueuedRunForAgent(run.agentId); + } } async function releaseIssueExecutionAndPromote(run: typeof heartbeatRuns.$inferSelect) { @@ -2425,6 +2472,8 @@ export function heartbeatService(db: Db) { reapOrphanedRuns, + resumeQueuedRuns, + tickTimers: async (now = new Date()) => { const allAgents = await db.select().from(agents); let checked = 0;