Implements Phase 0 of the agent evals framework plan from discussion #808 and PR #817. Adds the evals/ directory scaffold with promptfoo config and 8 deterministic test cases covering core heartbeat behaviors. Test cases: - core.assignment_pickup: picks in_progress before todo - core.progress_update: posts status comment before exiting - core.blocked_reporting: sets blocked status with explanation - governance.approval_required: reviews approval before acting - governance.company_boundary: refuses cross-company actions - core.no_work_exit: exits cleanly with no assignments - core.checkout_before_work: always checks out before modifying - core.conflict_handling: stops on 409, picks different task Model matrix: claude-sonnet-4, gpt-4.1, codex-5.4, gemini-2.5-pro via OpenRouter. Run with `pnpm evals:smoke`. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Co-Authored-By: Paperclip <noreply@paperclip.ing>
50 lines
2.0 KiB
JSON
50 lines
2.0 KiB
JSON
{
|
|
"name": "paperclip",
|
|
"private": true,
|
|
"type": "module",
|
|
"scripts": {
|
|
"dev": "node scripts/dev-runner.mjs watch",
|
|
"dev:watch": "node scripts/dev-runner.mjs watch",
|
|
"dev:once": "node scripts/dev-runner.mjs dev",
|
|
"dev:server": "pnpm --filter @paperclipai/server dev",
|
|
"dev:ui": "pnpm --filter @paperclipai/ui dev",
|
|
"build": "pnpm -r build",
|
|
"typecheck": "pnpm -r typecheck",
|
|
"test": "vitest",
|
|
"test:run": "vitest run",
|
|
"db:generate": "pnpm --filter @paperclipai/db generate",
|
|
"db:migrate": "pnpm --filter @paperclipai/db migrate",
|
|
"secrets:migrate-inline-env": "tsx scripts/migrate-inline-env-secrets.ts",
|
|
"db:backup": "./scripts/backup-db.sh",
|
|
"paperclipai": "node cli/node_modules/tsx/dist/cli.mjs cli/src/index.ts",
|
|
"build:npm": "./scripts/build-npm.sh",
|
|
"release:start": "./scripts/release-start.sh",
|
|
"release": "./scripts/release.sh",
|
|
"release:preflight": "./scripts/release-preflight.sh",
|
|
"release:github": "./scripts/create-github-release.sh",
|
|
"release:rollback": "./scripts/rollback-latest.sh",
|
|
"changeset": "changeset",
|
|
"version-packages": "changeset version",
|
|
"check:tokens": "node scripts/check-forbidden-tokens.mjs",
|
|
"docs:dev": "cd docs && npx mintlify dev",
|
|
"smoke:openclaw-join": "./scripts/smoke/openclaw-join.sh",
|
|
"smoke:openclaw-docker-ui": "./scripts/smoke/openclaw-docker-ui.sh",
|
|
"smoke:openclaw-sse-standalone": "./scripts/smoke/openclaw-sse-standalone.sh",
|
|
"test:e2e": "npx playwright test --config tests/e2e/playwright.config.ts",
|
|
"test:e2e:headed": "npx playwright test --config tests/e2e/playwright.config.ts --headed",
|
|
"evals:smoke": "cd evals/promptfoo && npx promptfoo@latest eval"
|
|
},
|
|
"devDependencies": {
|
|
"@changesets/cli": "^2.30.0",
|
|
"cross-env": "^10.1.0",
|
|
"@playwright/test": "^1.58.2",
|
|
"esbuild": "^0.27.3",
|
|
"typescript": "^5.7.3",
|
|
"vitest": "^3.0.5"
|
|
},
|
|
"engines": {
|
|
"node": ">=20"
|
|
},
|
|
"packageManager": "pnpm@9.15.4"
|
|
}
|