diff --git a/.github/workflows/release-smoke.yml b/.github/workflows/release-smoke.yml new file mode 100644 index 00000000..823a578c --- /dev/null +++ b/.github/workflows/release-smoke.yml @@ -0,0 +1,118 @@ +name: Release Smoke + +on: + workflow_dispatch: + inputs: + paperclip_version: + description: Published Paperclip dist-tag to test + required: true + default: canary + type: choice + options: + - canary + - latest + host_port: + description: Host port for the Docker smoke container + required: false + default: "3232" + type: string + artifact_name: + description: Artifact name for uploaded diagnostics + required: false + default: release-smoke + type: string + workflow_call: + inputs: + paperclip_version: + required: true + type: string + host_port: + required: false + default: "3232" + type: string + artifact_name: + required: false + default: release-smoke + type: string + +jobs: + smoke: + runs-on: ubuntu-latest + timeout-minutes: 45 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9.15.4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 24 + cache: pnpm + + - name: Install dependencies + run: pnpm install --no-frozen-lockfile + + - name: Install Playwright browser + run: npx playwright install --with-deps chromium + + - name: Launch Docker smoke harness + run: | + metadata_file="$RUNNER_TEMP/release-smoke.env" + HOST_PORT="${{ inputs.host_port }}" \ + DATA_DIR="$RUNNER_TEMP/release-smoke-data" \ + PAPERCLIPAI_VERSION="${{ inputs.paperclip_version }}" \ + SMOKE_DETACH=true \ + SMOKE_METADATA_FILE="$metadata_file" \ + ./scripts/docker-onboard-smoke.sh + set -a + source "$metadata_file" + set +a + { + echo "SMOKE_BASE_URL=$SMOKE_BASE_URL" + echo "SMOKE_ADMIN_EMAIL=$SMOKE_ADMIN_EMAIL" + echo "SMOKE_ADMIN_PASSWORD=$SMOKE_ADMIN_PASSWORD" + echo "SMOKE_CONTAINER_NAME=$SMOKE_CONTAINER_NAME" + echo "SMOKE_DATA_DIR=$SMOKE_DATA_DIR" + echo "SMOKE_IMAGE_NAME=$SMOKE_IMAGE_NAME" + echo "SMOKE_PAPERCLIPAI_VERSION=$SMOKE_PAPERCLIPAI_VERSION" + echo "SMOKE_METADATA_FILE=$metadata_file" + } >> "$GITHUB_ENV" + + - name: Run release smoke Playwright suite + env: + PAPERCLIP_RELEASE_SMOKE_BASE_URL: ${{ env.SMOKE_BASE_URL }} + PAPERCLIP_RELEASE_SMOKE_EMAIL: ${{ env.SMOKE_ADMIN_EMAIL }} + PAPERCLIP_RELEASE_SMOKE_PASSWORD: ${{ env.SMOKE_ADMIN_PASSWORD }} + run: pnpm run test:release-smoke + + - name: Capture Docker logs + if: always() + run: | + if [[ -n "${SMOKE_CONTAINER_NAME:-}" ]]; then + docker logs "$SMOKE_CONTAINER_NAME" >"$RUNNER_TEMP/docker-onboard-smoke.log" 2>&1 || true + fi + + - name: Upload diagnostics + if: always() + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: | + ${{ runner.temp }}/docker-onboard-smoke.log + ${{ env.SMOKE_METADATA_FILE }} + tests/release-smoke/playwright-report/ + tests/release-smoke/test-results/ + retention-days: 14 + + - name: Stop Docker smoke container + if: always() + run: | + if [[ -n "${SMOKE_CONTAINER_NAME:-}" ]]; then + docker rm -f "$SMOKE_CONTAINER_NAME" >/dev/null 2>&1 || true + fi diff --git a/.gitignore b/.gitignore index 312c3969..61b00a22 100644 --- a/.gitignore +++ b/.gitignore @@ -46,5 +46,7 @@ tmp/ # Playwright tests/e2e/test-results/ tests/e2e/playwright-report/ +tests/release-smoke/test-results/ +tests/release-smoke/playwright-report/ .superset/ .claude/worktrees/ diff --git a/doc/DOCKER.md b/doc/DOCKER.md index 6f6ca374..a7055e20 100644 --- a/doc/DOCKER.md +++ b/doc/DOCKER.md @@ -120,6 +120,7 @@ Useful overrides: ```sh HOST_PORT=3200 PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh PAPERCLIP_DEPLOYMENT_MODE=authenticated PAPERCLIP_DEPLOYMENT_EXPOSURE=private ./scripts/docker-onboard-smoke.sh +SMOKE_DETACH=true SMOKE_METADATA_FILE=/tmp/paperclip-smoke.env PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh ``` Notes: @@ -131,4 +132,5 @@ Notes: - Smoke script also defaults `PAPERCLIP_PUBLIC_URL` to `http://localhost:` so bootstrap invite URLs and auth callbacks use the reachable host port instead of the container's internal `3100`. - In authenticated mode, the smoke script defaults `SMOKE_AUTO_BOOTSTRAP=true` and drives the real bootstrap path automatically: it signs up a real user, runs `paperclipai auth bootstrap-ceo` inside the container to mint a real bootstrap invite, accepts that invite over HTTP, and verifies board session access. - Run the script in the foreground to watch the onboarding flow; stop with `Ctrl+C` after validation. +- Set `SMOKE_DETACH=true` to leave the container running for automation and optionally write shell-ready metadata to `SMOKE_METADATA_FILE`. - The image definition is in `Dockerfile.onboard-smoke`. diff --git a/doc/RELEASING.md b/doc/RELEASING.md index 401417fd..35d2b50a 100644 --- a/doc/RELEASING.md +++ b/doc/RELEASING.md @@ -69,6 +69,8 @@ Users install canaries with: ```bash npx paperclipai@canary onboard +# or +npx paperclipai@canary onboard --data-dir "$(mktemp -d /tmp/paperclip-canary.XXXXXX)" ``` ### Stable @@ -165,13 +167,22 @@ HOST_PORT=3232 DATA_DIR=./data/release-smoke-canary PAPERCLIPAI_VERSION=canary . HOST_PORT=3233 DATA_DIR=./data/release-smoke-stable PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh ``` +Automated browser smoke is also available: + +```bash +gh workflow run release-smoke.yml -f paperclip_version=canary +gh workflow run release-smoke.yml -f paperclip_version=latest +``` + Minimum checks: - `npx paperclipai@canary onboard` installs - onboarding completes without crashes -- the server boots -- the UI loads -- basic company creation and dashboard load work +- authenticated login works with the smoke credentials +- the browser lands in onboarding on a fresh instance +- company creation succeeds +- the first CEO agent is created +- the first CEO heartbeat run is triggered ## Rollback diff --git a/doc/plans/2026-03-17-docker-release-browser-e2e.md b/doc/plans/2026-03-17-docker-release-browser-e2e.md new file mode 100644 index 00000000..e776206a --- /dev/null +++ b/doc/plans/2026-03-17-docker-release-browser-e2e.md @@ -0,0 +1,424 @@ +# Docker Release Browser E2E Plan + +## Context + +Today release smoke testing for published Paperclip packages is manual and shell-driven: + +```sh +HOST_PORT=3232 DATA_DIR=./data/release-smoke-canary PAPERCLIPAI_VERSION=canary ./scripts/docker-onboard-smoke.sh +HOST_PORT=3233 DATA_DIR=./data/release-smoke-stable PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh +``` + +That is useful because it exercises the same public install surface users hit: + +- Docker +- `npx paperclipai@canary` +- `npx paperclipai@latest` +- authenticated bootstrap flow + +But it still leaves the most important release questions to a human with a browser: + +- can I sign in with the smoke credentials? +- do I land in onboarding? +- can I complete onboarding? +- does the initial CEO agent actually get created and run? + +The repo already has two adjacent pieces: + +- `tests/e2e/onboarding.spec.ts` covers the onboarding wizard against the local source tree +- `scripts/docker-onboard-smoke.sh` boots a published Docker install and auto-bootstraps authenticated mode, but only verifies the API/session layer + +What is missing is one deterministic browser test that joins those two paths. + +## Goal + +Add a release-grade Docker-backed browser E2E that validates the published `canary` and `latest` installs end to end: + +1. boot the published package in Docker +2. sign in with known smoke credentials +3. verify the user is routed into onboarding +4. complete onboarding in the browser +5. verify the first CEO agent exists +6. verify the initial CEO run was triggered and reached a terminal or active state + +Then wire that test into GitHub Actions so release validation is no longer manual-only. + +## Recommendation In One Sentence + +Turn the current Docker smoke script into a machine-friendly test harness, add a dedicated Playwright release-smoke spec that drives the authenticated browser flow against published Docker installs, and run it in GitHub Actions for both `canary` and `latest`. + +## What We Have Today + +### Existing local browser coverage + +`tests/e2e/onboarding.spec.ts` already proves the onboarding wizard can: + +- create a company +- create a CEO agent +- create an initial issue +- optionally observe task progress + +That is a good base, but it does not validate the public npm package, Docker path, authenticated login flow, or release dist-tags. + +### Existing Docker smoke coverage + +`scripts/docker-onboard-smoke.sh` already does useful setup work: + +- builds `Dockerfile.onboard-smoke` +- runs `paperclipai@${PAPERCLIPAI_VERSION}` inside Docker +- waits for health +- signs up or signs in a smoke admin user +- generates and accepts the bootstrap CEO invite in authenticated mode +- verifies a board session and `/api/companies` + +That means the hard bootstrap problem is mostly solved already. The main gap is that the script is human-oriented and never hands control to a browser test. + +### Existing CI shape + +The repo already has: + +- `.github/workflows/e2e.yml` for manual Playwright runs against local source +- `.github/workflows/release.yml` for canary publish on `master` and manual stable promotion + +So the right move is to extend the current test/release system, not create a parallel one. + +## Product Decision + +### 1. The release smoke should stay deterministic and token-free + +The first version should not require OpenAI, Anthropic, or external agent credentials. + +Use the onboarding flow with a deterministic adapter that can run on a stock GitHub runner and inside the published Docker install. The existing `process` adapter with a trivial command is the right base path for this release gate. + +That keeps this test focused on: + +- release packaging +- auth/bootstrap +- UI routing +- onboarding contract +- agent creation +- heartbeat invocation plumbing + +Later we can add a second credentialed smoke lane for real model-backed agents. + +### 2. Smoke credentials become an explicit test contract + +The current defaults in `scripts/docker-onboard-smoke.sh` should be treated as stable test fixtures: + +- email: `smoke-admin@paperclip.local` +- password: `paperclip-smoke-password` + +The browser test should log in with those exact values unless overridden by env vars. + +### 3. Published-package smoke and source-tree E2E stay separate + +Keep two lanes: + +- source-tree E2E for feature development +- published Docker release smoke for release confidence + +They overlap on onboarding assertions, but they guard different failure classes. + +## Proposed Design + +## 1. Add a CI-friendly Docker smoke harness + +Refactor `scripts/docker-onboard-smoke.sh` so it can run in two modes: + +- interactive mode + - current behavior + - streams logs and waits in foreground for manual inspection +- CI mode + - starts the container + - waits for health and authenticated bootstrap + - prints machine-readable metadata + - exits while leaving the container running for Playwright + +Recommended shape: + +- keep `scripts/docker-onboard-smoke.sh` as the public entry point +- add a `SMOKE_DETACH=true` or `--detach` mode +- emit a JSON blob or `.env` file containing: + - `SMOKE_BASE_URL` + - `SMOKE_ADMIN_EMAIL` + - `SMOKE_ADMIN_PASSWORD` + - `SMOKE_CONTAINER_NAME` + - `SMOKE_DATA_DIR` + +The workflow and Playwright tests can then consume the emitted metadata instead of scraping logs. + +### Why this matters + +The current script always tails logs and then blocks on `wait "$LOG_PID"`. That is convenient for manual smoke testing, but it is the wrong shape for CI orchestration. + +## 2. Add a dedicated Playwright release-smoke spec + +Create a second Playwright entry point specifically for published Docker installs, for example: + +- `tests/release-smoke/playwright.config.ts` +- `tests/release-smoke/docker-auth-onboarding.spec.ts` + +This suite should not use Playwright `webServer`, because the app server will already be running inside Docker. + +### Browser scenario + +The first release-smoke scenario should validate: + +1. open `/` +2. unauthenticated user is redirected to `/auth` +3. sign in using the smoke credentials +4. authenticated user lands on onboarding when no companies exist +5. onboarding wizard appears with the expected step labels +6. create a company +7. create the first agent using `process` +8. create the initial issue +9. finish onboarding and open the created issue +10. verify via API: + - company exists + - CEO agent exists + - issue exists and is assigned to the CEO +11. verify the first heartbeat run was triggered: + - either by checking issue status changed from initial state, or + - by checking agent/runs API shows a run for the CEO, or + - both + +The test should tolerate the run completing quickly. For this reason, the assertion should accept: + +- `queued` +- `running` +- `succeeded` + +and similarly for issue progression if the issue status changes before the assertion runs. + +### Why a separate spec instead of reusing `tests/e2e/onboarding.spec.ts` + +The local-source test and release-smoke test have different assumptions: + +- different server lifecycle +- different auth path +- different deployment mode +- published npm package instead of local workspace code + +Trying to force both through one spec will make both worse. + +## 3. Add a release-smoke workflow in GitHub Actions + +Add a workflow dedicated to this surface, ideally reusable: + +- `.github/workflows/release-smoke.yml` + +Recommended triggers: + +- `workflow_dispatch` +- `workflow_call` + +Recommended inputs: + +- `paperclip_version` + - `canary` or `latest` +- `host_port` + - optional, default runner-safe port +- `artifact_name` + - optional for clearer uploads + +### Job outline + +1. checkout repo +2. install Node/pnpm +3. install Playwright browser dependencies +4. launch Docker smoke harness in detached mode with the chosen dist-tag +5. run the release-smoke Playwright suite against the returned base URL +6. always collect diagnostics: + - Playwright report + - screenshots + - trace + - `docker logs` + - harness metadata file +7. stop and remove container + +### Why a reusable workflow + +This lets us: + +- run the smoke manually on demand +- call it from `release.yml` +- reuse the same job for both `canary` and `latest` + +## 4. Integrate it into release automation incrementally + +### Phase A: Manual workflow only + +First ship the workflow as manual-only so the harness and test can be stabilized without blocking releases. + +### Phase B: Run automatically after canary publish + +After `publish_canary` succeeds in `.github/workflows/release.yml`, call the reusable release-smoke workflow with: + +- `paperclip_version=canary` + +This proves the just-published public canary really boots and onboards. + +### Phase C: Run automatically after stable publish + +After `publish_stable` succeeds, call the same workflow with: + +- `paperclip_version=latest` + +This gives us post-publish confirmation that the stable dist-tag is healthy. + +### Important nuance + +Testing `latest` from npm cannot happen before stable publish, because the package under test does not exist under `latest` yet. So the `latest` smoke is a post-publish verification, not a pre-publish gate. + +If we later want a true pre-publish stable gate, that should be a separate source-ref or locally built package smoke job. + +## 5. Make diagnostics first-class + +This workflow is only valuable if failures are fast to debug. + +Always capture: + +- Playwright HTML report +- Playwright trace on failure +- final screenshot on failure +- full `docker logs` output +- emitted smoke metadata +- optional `curl /api/health` snapshot + +Without that, the test will become a flaky black box and people will stop trusting it. + +## Implementation Plan + +## Phase 1: Harness refactor + +Files: + +- `scripts/docker-onboard-smoke.sh` +- optionally `scripts/lib/docker-onboard-smoke.sh` or similar helper +- `doc/DOCKER.md` +- `doc/RELEASING.md` + +Tasks: + +1. Add detached/CI mode to the Docker smoke script. +2. Make the script emit machine-readable connection metadata. +3. Keep the current interactive manual mode intact. +4. Add reliable cleanup commands for CI. + +Acceptance: + +- a script invocation can start the published Docker app, auto-bootstrap it, and return control to the caller with enough metadata for browser automation + +## Phase 2: Browser release-smoke suite + +Files: + +- `tests/release-smoke/playwright.config.ts` +- `tests/release-smoke/docker-auth-onboarding.spec.ts` +- root `package.json` + +Tasks: + +1. Add a dedicated Playwright config for external server testing. +2. Implement login + onboarding + CEO creation flow. +3. Assert a CEO run was created or completed. +4. Add a root script such as: + - `test:release-smoke` + +Acceptance: + +- the suite passes locally against both: + - `PAPERCLIPAI_VERSION=canary` + - `PAPERCLIPAI_VERSION=latest` + +## Phase 3: GitHub Actions workflow + +Files: + +- `.github/workflows/release-smoke.yml` + +Tasks: + +1. Add manual and reusable workflow entry points. +2. Install Chromium and runner dependencies. +3. Start Docker smoke in detached mode. +4. Run the release-smoke Playwright suite. +5. Upload diagnostics artifacts. + +Acceptance: + +- a maintainer can run the workflow manually for either `canary` or `latest` + +## Phase 4: Release workflow integration + +Files: + +- `.github/workflows/release.yml` +- `doc/RELEASING.md` + +Tasks: + +1. Trigger release smoke automatically after canary publish. +2. Trigger release smoke automatically after stable publish. +3. Document expected behavior and failure handling. + +Acceptance: + +- canary releases automatically produce a published-package browser smoke result +- stable releases automatically produce a `latest` browser smoke result + +## Phase 5: Future extension for real model-backed agent validation + +Not part of the first implementation, but this should be the next layer after the deterministic lane is stable. + +Possible additions: + +- a second Playwright project gated on repo secrets +- real `claude_local` or `codex_local` adapter validation in Docker-capable environments +- assertion that the CEO posts a real task/comment artifact +- stable release holdback until the credentialed lane passes + +This should stay optional until the token-free lane is trustworthy. + +## Acceptance Criteria + +The plan is complete when the implemented system can demonstrate all of the following: + +1. A published `paperclipai@canary` Docker install can be smoke-tested by Playwright in CI. +2. A published `paperclipai@latest` Docker install can be smoke-tested by Playwright in CI. +3. The test logs into authenticated mode with the smoke credentials. +4. The test sees onboarding for a fresh instance. +5. The test completes onboarding in the browser. +6. The test verifies the initial CEO agent was created. +7. The test verifies at least one CEO heartbeat run was triggered. +8. Failures produce actionable artifacts rather than just a red job. + +## Risks And Decisions To Make + +### 1. Fast process runs may finish before the UI visibly updates + +That is expected. The assertions should prefer API polling for run existence/status rather than only visual indicators. + +### 2. `latest` smoke is post-publish, not preventive + +This is a real limitation of testing the published dist-tag itself. It is still valuable, but it should not be confused with a pre-publish gate. + +### 3. We should not overcouple the test to cosmetic onboarding text + +The important contract is flow success, created entities, and run creation. Use visible labels sparingly and prefer stable semantic selectors where possible. + +### 4. Keep the smoke adapter path boring + +For release safety, the first test should use the most boring runnable adapter possible. This is not the place to validate every adapter. + +## Recommended First Slice + +If we want the fastest path to value, ship this in order: + +1. add detached mode to `scripts/docker-onboard-smoke.sh` +2. add one Playwright spec for authenticated login + onboarding + CEO run verification +3. add manual `release-smoke.yml` +4. once stable, wire canary into `release.yml` +5. after that, wire stable `latest` smoke into `release.yml` + +That gives release confidence quickly without turning the first version into a large CI redesign. diff --git a/package.json b/package.json index 83de361a..71853b89 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,9 @@ "smoke:openclaw-docker-ui": "./scripts/smoke/openclaw-docker-ui.sh", "smoke:openclaw-sse-standalone": "./scripts/smoke/openclaw-sse-standalone.sh", "test:e2e": "npx playwright test --config tests/e2e/playwright.config.ts", - "test:e2e:headed": "npx playwright test --config tests/e2e/playwright.config.ts --headed" + "test:e2e:headed": "npx playwright test --config tests/e2e/playwright.config.ts --headed", + "test:release-smoke": "npx playwright test --config tests/release-smoke/playwright.config.ts", + "test:release-smoke:headed": "npx playwright test --config tests/release-smoke/playwright.config.ts --headed" }, "devDependencies": { "cross-env": "^10.1.0", diff --git a/scripts/docker-onboard-smoke.sh b/scripts/docker-onboard-smoke.sh index 41c875be..97f6743f 100755 --- a/scripts/docker-onboard-smoke.sh +++ b/scripts/docker-onboard-smoke.sh @@ -7,6 +7,8 @@ HOST_PORT="${HOST_PORT:-3131}" PAPERCLIPAI_VERSION="${PAPERCLIPAI_VERSION:-latest}" DATA_DIR="${DATA_DIR:-$REPO_ROOT/data/docker-onboard-smoke}" HOST_UID="${HOST_UID:-$(id -u)}" +SMOKE_DETACH="${SMOKE_DETACH:-false}" +SMOKE_METADATA_FILE="${SMOKE_METADATA_FILE:-}" PAPERCLIP_DEPLOYMENT_MODE="${PAPERCLIP_DEPLOYMENT_MODE:-authenticated}" PAPERCLIP_DEPLOYMENT_EXPOSURE="${PAPERCLIP_DEPLOYMENT_EXPOSURE:-private}" PAPERCLIP_PUBLIC_URL="${PAPERCLIP_PUBLIC_URL:-http://localhost:${HOST_PORT}}" @@ -18,6 +20,7 @@ CONTAINER_NAME="${IMAGE_NAME//[^a-zA-Z0-9_.-]/-}" LOG_PID="" COOKIE_JAR="" TMP_DIR="" +PRESERVE_CONTAINER_ON_EXIT="false" mkdir -p "$DATA_DIR" @@ -25,7 +28,9 @@ cleanup() { if [[ -n "$LOG_PID" ]]; then kill "$LOG_PID" >/dev/null 2>&1 || true fi - docker stop "$CONTAINER_NAME" >/dev/null 2>&1 || true + if [[ "$PRESERVE_CONTAINER_ON_EXIT" != "true" ]]; then + docker stop "$CONTAINER_NAME" >/dev/null 2>&1 || true + fi if [[ -n "$TMP_DIR" && -d "$TMP_DIR" ]]; then rm -rf "$TMP_DIR" fi @@ -33,6 +38,12 @@ cleanup() { trap cleanup EXIT INT TERM +container_is_running() { + local running + running="$(docker inspect -f '{{.State.Running}}' "$CONTAINER_NAME" 2>/dev/null || true)" + [[ "$running" == "true" ]] +} + wait_for_http() { local url="$1" local attempts="${2:-60}" @@ -42,11 +53,36 @@ wait_for_http() { if curl -fsS "$url" >/dev/null 2>&1; then return 0 fi + if ! container_is_running; then + echo "Smoke bootstrap failed: container $CONTAINER_NAME exited before $url became ready" >&2 + docker logs "$CONTAINER_NAME" >&2 || true + return 1 + fi sleep "$sleep_seconds" done + if ! container_is_running; then + echo "Smoke bootstrap failed: container $CONTAINER_NAME exited before readiness check completed" >&2 + docker logs "$CONTAINER_NAME" >&2 || true + fi return 1 } +write_metadata_file() { + if [[ -z "$SMOKE_METADATA_FILE" ]]; then + return 0 + fi + mkdir -p "$(dirname "$SMOKE_METADATA_FILE")" + { + printf 'SMOKE_BASE_URL=%q\n' "$PAPERCLIP_PUBLIC_URL" + printf 'SMOKE_ADMIN_EMAIL=%q\n' "$SMOKE_ADMIN_EMAIL" + printf 'SMOKE_ADMIN_PASSWORD=%q\n' "$SMOKE_ADMIN_PASSWORD" + printf 'SMOKE_CONTAINER_NAME=%q\n' "$CONTAINER_NAME" + printf 'SMOKE_DATA_DIR=%q\n' "$DATA_DIR" + printf 'SMOKE_IMAGE_NAME=%q\n' "$IMAGE_NAME" + printf 'SMOKE_PAPERCLIPAI_VERSION=%q\n' "$PAPERCLIPAI_VERSION" + } >"$SMOKE_METADATA_FILE" +} + generate_bootstrap_invite_url() { local bootstrap_output local bootstrap_status @@ -214,9 +250,12 @@ echo "==> Running onboard smoke container" echo " UI should be reachable at: http://localhost:$HOST_PORT" echo " Public URL: $PAPERCLIP_PUBLIC_URL" echo " Smoke auto-bootstrap: $SMOKE_AUTO_BOOTSTRAP" +echo " Detached mode: $SMOKE_DETACH" echo " Data dir: $DATA_DIR" echo " Deployment: $PAPERCLIP_DEPLOYMENT_MODE/$PAPERCLIP_DEPLOYMENT_EXPOSURE" -echo " Live output: onboard banner and server logs stream in this terminal (Ctrl+C to stop)" +if [[ "$SMOKE_DETACH" != "true" ]]; then + echo " Live output: onboard banner and server logs stream in this terminal (Ctrl+C to stop)" +fi docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true @@ -231,8 +270,10 @@ docker run -d --rm \ -v "$DATA_DIR:/paperclip" \ "$IMAGE_NAME" >/dev/null -docker logs -f "$CONTAINER_NAME" & -LOG_PID=$! +if [[ "$SMOKE_DETACH" != "true" ]]; then + docker logs -f "$CONTAINER_NAME" & + LOG_PID=$! +fi TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/paperclip-onboard-smoke.XXXXXX")" COOKIE_JAR="$TMP_DIR/cookies.txt" @@ -246,4 +287,17 @@ if [[ "$SMOKE_AUTO_BOOTSTRAP" == "true" && "$PAPERCLIP_DEPLOYMENT_MODE" == "auth auto_bootstrap_authenticated_smoke fi +write_metadata_file + +if [[ "$SMOKE_DETACH" == "true" ]]; then + PRESERVE_CONTAINER_ON_EXIT="true" + echo "==> Smoke container ready for automation" + echo " Smoke base URL: $PAPERCLIP_PUBLIC_URL" + echo " Smoke admin credentials: $SMOKE_ADMIN_EMAIL / $SMOKE_ADMIN_PASSWORD" + if [[ -n "$SMOKE_METADATA_FILE" ]]; then + echo " Smoke metadata file: $SMOKE_METADATA_FILE" + fi + exit 0 +fi + wait "$LOG_PID" diff --git a/tests/release-smoke/docker-auth-onboarding.spec.ts b/tests/release-smoke/docker-auth-onboarding.spec.ts new file mode 100644 index 00000000..068c4234 --- /dev/null +++ b/tests/release-smoke/docker-auth-onboarding.spec.ts @@ -0,0 +1,146 @@ +import { expect, test, type Page } from "@playwright/test"; + +const ADMIN_EMAIL = + process.env.PAPERCLIP_RELEASE_SMOKE_EMAIL ?? + process.env.SMOKE_ADMIN_EMAIL ?? + "smoke-admin@paperclip.local"; +const ADMIN_PASSWORD = + process.env.PAPERCLIP_RELEASE_SMOKE_PASSWORD ?? + process.env.SMOKE_ADMIN_PASSWORD ?? + "paperclip-smoke-password"; + +const COMPANY_NAME = `Release-Smoke-${Date.now()}`; +const AGENT_NAME = "CEO"; +const TASK_TITLE = "Release smoke task"; + +async function signIn(page: Page) { + await page.goto("/"); + await expect(page).toHaveURL(/\/auth/); + + await page.locator('input[type="email"]').fill(ADMIN_EMAIL); + await page.locator('input[type="password"]').fill(ADMIN_PASSWORD); + await page.getByRole("button", { name: "Sign In" }).click(); + + await expect(page).not.toHaveURL(/\/auth/, { timeout: 20_000 }); +} + +async function openOnboarding(page: Page) { + const wizardHeading = page.locator("h3", { hasText: "Name your company" }); + const startButton = page.getByRole("button", { name: "Start Onboarding" }); + + await expect(wizardHeading.or(startButton)).toBeVisible({ timeout: 20_000 }); + + if (await startButton.isVisible()) { + await startButton.click(); + } + + await expect(wizardHeading).toBeVisible({ timeout: 10_000 }); +} + +test.describe("Docker authenticated onboarding smoke", () => { + test("logs in, completes onboarding, and triggers the first CEO run", async ({ + page, + }) => { + await signIn(page); + await openOnboarding(page); + + await page.locator('input[placeholder="Acme Corp"]').fill(COMPANY_NAME); + await page.getByRole("button", { name: "Next" }).click(); + + await expect( + page.locator("h3", { hasText: "Create your first agent" }) + ).toBeVisible({ timeout: 10_000 }); + + await expect(page.locator('input[placeholder="CEO"]')).toHaveValue(AGENT_NAME); + await page.getByRole("button", { name: "Process" }).click(); + await page.locator('input[placeholder="e.g. node, python"]').fill("echo"); + await page + .locator('input[placeholder="e.g. script.js, --flag"]') + .fill("release smoke"); + await page.getByRole("button", { name: "Next" }).click(); + + await expect( + page.locator("h3", { hasText: "Give it something to do" }) + ).toBeVisible({ timeout: 10_000 }); + await page + .locator('input[placeholder="e.g. Research competitor pricing"]') + .fill(TASK_TITLE); + await page.getByRole("button", { name: "Next" }).click(); + + await expect( + page.locator("h3", { hasText: "Ready to launch" }) + ).toBeVisible({ timeout: 10_000 }); + await expect(page.getByText(COMPANY_NAME)).toBeVisible(); + await expect(page.getByText(AGENT_NAME)).toBeVisible(); + await expect(page.getByText(TASK_TITLE)).toBeVisible(); + + await page.getByRole("button", { name: "Create & Open Issue" }).click(); + await expect(page).toHaveURL(/\/issues\//, { timeout: 10_000 }); + + const baseUrl = new URL(page.url()).origin; + + const companiesRes = await page.request.get(`${baseUrl}/api/companies`); + expect(companiesRes.ok()).toBe(true); + const companies = (await companiesRes.json()) as Array<{ id: string; name: string }>; + const company = companies.find((entry) => entry.name === COMPANY_NAME); + expect(company).toBeTruthy(); + + const agentsRes = await page.request.get( + `${baseUrl}/api/companies/${company!.id}/agents` + ); + expect(agentsRes.ok()).toBe(true); + const agents = (await agentsRes.json()) as Array<{ + id: string; + name: string; + role: string; + adapterType: string; + }>; + const ceoAgent = agents.find((entry) => entry.name === AGENT_NAME); + expect(ceoAgent).toBeTruthy(); + expect(ceoAgent!.role).toBe("ceo"); + expect(ceoAgent!.adapterType).toBe("process"); + + const issuesRes = await page.request.get( + `${baseUrl}/api/companies/${company!.id}/issues` + ); + expect(issuesRes.ok()).toBe(true); + const issues = (await issuesRes.json()) as Array<{ + id: string; + title: string; + assigneeAgentId: string | null; + }>; + const issue = issues.find((entry) => entry.title === TASK_TITLE); + expect(issue).toBeTruthy(); + expect(issue!.assigneeAgentId).toBe(ceoAgent!.id); + + await expect.poll( + async () => { + const runsRes = await page.request.get( + `${baseUrl}/api/companies/${company!.id}/heartbeat-runs?agentId=${ceoAgent!.id}` + ); + expect(runsRes.ok()).toBe(true); + const runs = (await runsRes.json()) as Array<{ + agentId: string; + invocationSource: string; + status: string; + }>; + const latestRun = runs.find((entry) => entry.agentId === ceoAgent!.id); + return latestRun + ? { + invocationSource: latestRun.invocationSource, + status: latestRun.status, + } + : null; + }, + { + timeout: 30_000, + intervals: [1_000, 2_000, 5_000], + } + ).toEqual( + expect.objectContaining({ + invocationSource: "assignment", + status: expect.stringMatching(/^(queued|running|succeeded)$/), + }) + ); + }); +}); diff --git a/tests/release-smoke/playwright.config.ts b/tests/release-smoke/playwright.config.ts new file mode 100644 index 00000000..76e278f9 --- /dev/null +++ b/tests/release-smoke/playwright.config.ts @@ -0,0 +1,28 @@ +import { defineConfig } from "@playwright/test"; + +const BASE_URL = + process.env.PAPERCLIP_RELEASE_SMOKE_BASE_URL ?? "http://127.0.0.1:3232"; + +export default defineConfig({ + testDir: ".", + testMatch: "**/*.spec.ts", + timeout: 90_000, + expect: { + timeout: 15_000, + }, + retries: process.env.CI ? 1 : 0, + use: { + baseURL: BASE_URL, + headless: true, + screenshot: "only-on-failure", + trace: "retain-on-failure", + }, + projects: [ + { + name: "chromium", + use: { browserName: "chromium" }, + }, + ], + outputDir: "./test-results", + reporter: [["list"], ["html", { open: "never", outputFolder: "./playwright-report" }]], +});