Move inline test cases from promptfooconfig.yaml into separate files organized by category (core.yaml, governance.yaml). Main config now uses file://tests/*.yaml glob pattern per promptfoo best practices. This makes it easier to add new test categories without bloating the main config, and lets contributors add cases by dropping new YAML files into tests/.
37 lines
1019 B
YAML
37 lines
1019 B
YAML
# Paperclip Agent Evals - Phase 0: Promptfoo Bootstrap
|
|
#
|
|
# Tests narrow heartbeat behaviors across models with deterministic assertions.
|
|
# Test cases are organized by category in tests/*.yaml files.
|
|
# See doc/plans/2026-03-13-agent-evals-framework.md for the full framework plan.
|
|
#
|
|
# Usage:
|
|
# cd evals/promptfoo && promptfoo eval
|
|
# promptfoo view # open results in browser
|
|
#
|
|
# Validate config before committing:
|
|
# promptfoo validate
|
|
#
|
|
# Requires OPENROUTER_API_KEY or individual provider keys.
|
|
|
|
description: "Paperclip heartbeat behavior evals"
|
|
|
|
prompts:
|
|
- file://prompts/heartbeat-system.txt
|
|
|
|
providers:
|
|
- id: openrouter:anthropic/claude-sonnet-4-20250514
|
|
label: claude-sonnet-4
|
|
- id: openrouter:openai/gpt-4.1
|
|
label: gpt-4.1
|
|
- id: openrouter:openai/codex-5.4
|
|
label: codex-5.4
|
|
- id: openrouter:google/gemini-2.5-pro
|
|
label: gemini-2.5-pro
|
|
|
|
defaultTest:
|
|
options:
|
|
transformVars: "{ ...vars, apiUrl: 'http://localhost:18080', runId: 'run-eval-001' }"
|
|
|
|
tests:
|
|
- file://tests/*.yaml
|