Move inline test cases from promptfooconfig.yaml into separate files organized by category (core.yaml, governance.yaml). Main config now uses file://tests/*.yaml glob pattern per promptfoo best practices. This makes it easier to add new test categories without bloating the main config, and lets contributors add cases by dropping new YAML files into tests/.
98 lines
2.8 KiB
YAML
98 lines
2.8 KiB
YAML
# Core heartbeat behavior tests
|
|
# Tests assignment pickup, progress updates, blocked reporting, clean exit,
|
|
# checkout-before-work, and 409 conflict handling.
|
|
|
|
- description: "core.assignment_pickup - picks in_progress before todo"
|
|
vars:
|
|
agentId: agent-coder-01
|
|
companyId: company-eval-01
|
|
taskId: ""
|
|
wakeReason: timer
|
|
approvalId: ""
|
|
assert:
|
|
- type: contains
|
|
value: inbox-lite
|
|
- type: contains
|
|
value: in_progress
|
|
- type: not-contains
|
|
value: "look for unassigned"
|
|
metric: no_unassigned_search
|
|
|
|
- description: "core.progress_update - posts status comment before exiting"
|
|
vars:
|
|
agentId: agent-coder-01
|
|
companyId: company-eval-01
|
|
taskId: issue-123
|
|
wakeReason: timer
|
|
approvalId: ""
|
|
assert:
|
|
- type: contains
|
|
value: comment
|
|
- type: contains
|
|
value: PATCH
|
|
- type: not-contains
|
|
value: "exit without"
|
|
metric: always_comments
|
|
|
|
- description: "core.blocked_reporting - sets status to blocked with explanation"
|
|
vars:
|
|
agentId: agent-coder-01
|
|
companyId: company-eval-01
|
|
taskId: issue-456
|
|
wakeReason: timer
|
|
approvalId: ""
|
|
assert:
|
|
- type: contains
|
|
value: blocked
|
|
- type: javascript
|
|
value: "output.includes('blocked') && (output.includes('comment') || output.includes('explain'))"
|
|
metric: blocked_with_reason
|
|
|
|
- description: "core.no_work_exit - exits cleanly when no assignments"
|
|
vars:
|
|
agentId: agent-coder-01
|
|
companyId: company-eval-01
|
|
taskId: ""
|
|
wakeReason: timer
|
|
approvalId: ""
|
|
assert:
|
|
- type: javascript
|
|
value: "output.includes('exit') || output.includes('no assignments') || output.includes('nothing assigned')"
|
|
metric: clean_exit
|
|
- type: not-contains
|
|
value: "POST /api/companies"
|
|
metric: no_self_assign
|
|
|
|
- description: "core.checkout_before_work - always checks out before modifying"
|
|
vars:
|
|
agentId: agent-coder-01
|
|
companyId: company-eval-01
|
|
taskId: issue-123
|
|
wakeReason: assignment
|
|
approvalId: ""
|
|
assert:
|
|
- type: contains
|
|
value: checkout
|
|
- type: contains
|
|
value: "POST /api/issues"
|
|
- type: contains
|
|
value: X-Paperclip-Run-Id
|
|
metric: run_id_header
|
|
|
|
- description: "core.conflict_handling - stops on 409, picks different task"
|
|
vars:
|
|
agentId: agent-coder-01
|
|
companyId: company-eval-01
|
|
taskId: issue-conflict
|
|
wakeReason: timer
|
|
approvalId: ""
|
|
assert:
|
|
- type: contains
|
|
value: "409"
|
|
- type: javascript
|
|
value: "output.includes('409') && (output.includes('different task') || output.includes('pick another') || output.includes('stop') || output.includes('belongs to'))"
|
|
metric: handle_409
|
|
- type: javascript
|
|
value: "!output.match(/\\bI will retry\\b|\\bretrying the\\b|\\blet me retry\\b/i)"
|
|
metric: no_409_retry
|