Files
paperclip/package.json
Matt Van Horn fbb8d10305 feat(evals): bootstrap promptfoo eval framework (Phase 0)
Implements Phase 0 of the agent evals framework plan from discussion #808
and PR #817. Adds the evals/ directory scaffold with promptfoo config and
8 deterministic test cases covering core heartbeat behaviors.

Test cases:
- core.assignment_pickup: picks in_progress before todo
- core.progress_update: posts status comment before exiting
- core.blocked_reporting: sets blocked status with explanation
- governance.approval_required: reviews approval before acting
- governance.company_boundary: refuses cross-company actions
- core.no_work_exit: exits cleanly with no assignments
- core.checkout_before_work: always checks out before modifying
- core.conflict_handling: stops on 409, picks different task

Model matrix: claude-sonnet-4, gpt-4.1, codex-5.4, gemini-2.5-pro via
OpenRouter. Run with `pnpm evals:smoke`.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-13 17:09:51 -07:00

50 lines
2.0 KiB
JSON

{
"name": "paperclip",
"private": true,
"type": "module",
"scripts": {
"dev": "node scripts/dev-runner.mjs watch",
"dev:watch": "node scripts/dev-runner.mjs watch",
"dev:once": "node scripts/dev-runner.mjs dev",
"dev:server": "pnpm --filter @paperclipai/server dev",
"dev:ui": "pnpm --filter @paperclipai/ui dev",
"build": "pnpm -r build",
"typecheck": "pnpm -r typecheck",
"test": "vitest",
"test:run": "vitest run",
"db:generate": "pnpm --filter @paperclipai/db generate",
"db:migrate": "pnpm --filter @paperclipai/db migrate",
"secrets:migrate-inline-env": "tsx scripts/migrate-inline-env-secrets.ts",
"db:backup": "./scripts/backup-db.sh",
"paperclipai": "node cli/node_modules/tsx/dist/cli.mjs cli/src/index.ts",
"build:npm": "./scripts/build-npm.sh",
"release:start": "./scripts/release-start.sh",
"release": "./scripts/release.sh",
"release:preflight": "./scripts/release-preflight.sh",
"release:github": "./scripts/create-github-release.sh",
"release:rollback": "./scripts/rollback-latest.sh",
"changeset": "changeset",
"version-packages": "changeset version",
"check:tokens": "node scripts/check-forbidden-tokens.mjs",
"docs:dev": "cd docs && npx mintlify dev",
"smoke:openclaw-join": "./scripts/smoke/openclaw-join.sh",
"smoke:openclaw-docker-ui": "./scripts/smoke/openclaw-docker-ui.sh",
"smoke:openclaw-sse-standalone": "./scripts/smoke/openclaw-sse-standalone.sh",
"test:e2e": "npx playwright test --config tests/e2e/playwright.config.ts",
"test:e2e:headed": "npx playwright test --config tests/e2e/playwright.config.ts --headed",
"evals:smoke": "cd evals/promptfoo && npx promptfoo@latest eval"
},
"devDependencies": {
"@changesets/cli": "^2.30.0",
"cross-env": "^10.1.0",
"@playwright/test": "^1.58.2",
"esbuild": "^0.27.3",
"typescript": "^5.7.3",
"vitest": "^3.0.5"
},
"engines": {
"node": ">=20"
},
"packageManager": "pnpm@9.15.4"
}