feat: add release smoke workflow

2026-03-18 07:59:32 -05:00
parent 3e0e15394a
commit 19f4a78f4a
9 changed files with 795 additions and 8 deletions
--- a/.github/workflows/release-smoke.yml
+++ b/.github/workflows/release-smoke.yml
@@ -0,0 +1,118 @@
 name: Release Smoke
 on:
  workflow_dispatch:
    inputs:
      paperclip_version:
        description: Published Paperclip dist-tag to test
        required: true
        default: canary
        type: choice
        options:
          - canary
          - latest
      host_port:
        description: Host port for the Docker smoke container
        required: false
        default: "3232"
        type: string
      artifact_name:
        description: Artifact name for uploaded diagnostics
        required: false
        default: release-smoke
        type: string
  workflow_call:
    inputs:
      paperclip_version:
        required: true
        type: string
      host_port:
        required: false
        default: "3232"
        type: string
      artifact_name:
        required: false
        default: release-smoke
        type: string
 jobs:
  smoke:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
          version: 9.15.4
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: 24
          cache: pnpm
      - name: Install dependencies
        run: pnpm install --no-frozen-lockfile
      - name: Install Playwright browser
        run: npx playwright install --with-deps chromium
      - name: Launch Docker smoke harness
        run: |
          metadata_file="$RUNNER_TEMP/release-smoke.env"
          HOST_PORT="${{ inputs.host_port }}" \
          DATA_DIR="$RUNNER_TEMP/release-smoke-data" \
          PAPERCLIPAI_VERSION="${{ inputs.paperclip_version }}" \
          SMOKE_DETACH=true \
          SMOKE_METADATA_FILE="$metadata_file" \
          ./scripts/docker-onboard-smoke.sh
          set -a
          source "$metadata_file"
          set +a
          {
            echo "SMOKE_BASE_URL=$SMOKE_BASE_URL"
            echo "SMOKE_ADMIN_EMAIL=$SMOKE_ADMIN_EMAIL"
            echo "SMOKE_ADMIN_PASSWORD=$SMOKE_ADMIN_PASSWORD"
            echo "SMOKE_CONTAINER_NAME=$SMOKE_CONTAINER_NAME"
            echo "SMOKE_DATA_DIR=$SMOKE_DATA_DIR"
            echo "SMOKE_IMAGE_NAME=$SMOKE_IMAGE_NAME"
            echo "SMOKE_PAPERCLIPAI_VERSION=$SMOKE_PAPERCLIPAI_VERSION"
            echo "SMOKE_METADATA_FILE=$metadata_file"
          } >> "$GITHUB_ENV"
      - name: Run release smoke Playwright suite
        env:
          PAPERCLIP_RELEASE_SMOKE_BASE_URL: ${{ env.SMOKE_BASE_URL }}
          PAPERCLIP_RELEASE_SMOKE_EMAIL: ${{ env.SMOKE_ADMIN_EMAIL }}
          PAPERCLIP_RELEASE_SMOKE_PASSWORD: ${{ env.SMOKE_ADMIN_PASSWORD }}
        run: pnpm run test:release-smoke
      - name: Capture Docker logs
        if: always()
        run: |
          if [[ -n "${SMOKE_CONTAINER_NAME:-}" ]]; then
            docker logs "$SMOKE_CONTAINER_NAME" >"$RUNNER_TEMP/docker-onboard-smoke.log" 2>&1 || true
          fi
      - name: Upload diagnostics
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: ${{ inputs.artifact_name }}
          path: |
            ${{ runner.temp }}/docker-onboard-smoke.log
            ${{ env.SMOKE_METADATA_FILE }}
            tests/release-smoke/playwright-report/
            tests/release-smoke/test-results/
          retention-days: 14
      - name: Stop Docker smoke container
        if: always()
        run: |
          if [[ -n "${SMOKE_CONTAINER_NAME:-}" ]]; then
            docker rm -f "$SMOKE_CONTAINER_NAME" >/dev/null 2>&1 || true
          fi
--- a/.gitignore
+++ b/.gitignore
@@ -46,5 +46,7 @@ tmp/
 # Playwright
 tests/e2e/test-results/
 tests/e2e/playwright-report/
 tests/release-smoke/test-results/
 tests/release-smoke/playwright-report/
 .superset/
 .claude/worktrees/
--- a/doc/DOCKER.md
+++ b/doc/DOCKER.md
@@ -120,6 +120,7 @@ Useful overrides:
 ```sh
 HOST_PORT=3200 PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh
 PAPERCLIP_DEPLOYMENT_MODE=authenticated PAPERCLIP_DEPLOYMENT_EXPOSURE=private ./scripts/docker-onboard-smoke.sh
 SMOKE_DETACH=true SMOKE_METADATA_FILE=/tmp/paperclip-smoke.env PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh
 ```
 Notes:
@@ -131,4 +132,5 @@ Notes:
 - Smoke script also defaults `PAPERCLIP_PUBLIC_URL` to `http://localhost:<HOST_PORT>` so bootstrap invite URLs and auth callbacks use the reachable host port instead of the container's internal `3100`.
 - In authenticated mode, the smoke script defaults `SMOKE_AUTO_BOOTSTRAP=true` and drives the real bootstrap path automatically: it signs up a real user, runs `paperclipai auth bootstrap-ceo` inside the container to mint a real bootstrap invite, accepts that invite over HTTP, and verifies board session access.
 - Run the script in the foreground to watch the onboarding flow; stop with `Ctrl+C` after validation.
 - Set `SMOKE_DETACH=true` to leave the container running for automation and optionally write shell-ready metadata to `SMOKE_METADATA_FILE`.
 - The image definition is in `Dockerfile.onboard-smoke`.
--- a/doc/RELEASING.md
+++ b/doc/RELEASING.md
@@ -69,6 +69,8 @@ Users install canaries with:
 ```bash
 npx paperclipai@canary onboard
 # or
 npx paperclipai@canary onboard --data-dir "$(mktemp -d /tmp/paperclip-canary.XXXXXX)"
 ```
 ### Stable
@@ -165,13 +167,22 @@ HOST_PORT=3232 DATA_DIR=./data/release-smoke-canary PAPERCLIPAI_VERSION=canary .
 HOST_PORT=3233 DATA_DIR=./data/release-smoke-stable PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh
 ```
 Automated browser smoke is also available:
 ```bash
 gh workflow run release-smoke.yml -f paperclip_version=canary
 gh workflow run release-smoke.yml -f paperclip_version=latest
 ```
 Minimum checks:
 - `npx paperclipai@canary onboard` installs
 - onboarding completes without crashes
- the server boots
+- authenticated login works with the smoke credentials
- the UI loads
+- the browser lands in onboarding on a fresh instance
- basic company creation and dashboard load work
+- company creation succeeds
 - the first CEO agent is created
 - the first CEO heartbeat run is triggered
 ## Rollback
--- a/doc/plans/2026-03-17-docker-release-browser-e2e.md
+++ b/doc/plans/2026-03-17-docker-release-browser-e2e.md
@@ -0,0 +1,424 @@
 # Docker Release Browser E2E Plan
 ## Context
 Today release smoke testing for published Paperclip packages is manual and shell-driven:
 ```sh
 HOST_PORT=3232 DATA_DIR=./data/release-smoke-canary PAPERCLIPAI_VERSION=canary ./scripts/docker-onboard-smoke.sh
 HOST_PORT=3233 DATA_DIR=./data/release-smoke-stable PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh
 ```
 That is useful because it exercises the same public install surface users hit:
 - Docker
 - `npx paperclipai@canary`
 - `npx paperclipai@latest`
 - authenticated bootstrap flow
 But it still leaves the most important release questions to a human with a browser:
 - can I sign in with the smoke credentials?
 - do I land in onboarding?
 - can I complete onboarding?
 - does the initial CEO agent actually get created and run?
 The repo already has two adjacent pieces:
 - `tests/e2e/onboarding.spec.ts` covers the onboarding wizard against the local source tree
 - `scripts/docker-onboard-smoke.sh` boots a published Docker install and auto-bootstraps authenticated mode, but only verifies the API/session layer
 What is missing is one deterministic browser test that joins those two paths.
 ## Goal
 Add a release-grade Docker-backed browser E2E that validates the published `canary` and `latest` installs end to end:
 1. boot the published package in Docker
 2. sign in with known smoke credentials
 3. verify the user is routed into onboarding
 4. complete onboarding in the browser
 5. verify the first CEO agent exists
 6. verify the initial CEO run was triggered and reached a terminal or active state
 Then wire that test into GitHub Actions so release validation is no longer manual-only.
 ## Recommendation In One Sentence
 Turn the current Docker smoke script into a machine-friendly test harness, add a dedicated Playwright release-smoke spec that drives the authenticated browser flow against published Docker installs, and run it in GitHub Actions for both `canary` and `latest`.
 ## What We Have Today
 ### Existing local browser coverage
 `tests/e2e/onboarding.spec.ts` already proves the onboarding wizard can:
 - create a company
 - create a CEO agent
 - create an initial issue
 - optionally observe task progress
 That is a good base, but it does not validate the public npm package, Docker path, authenticated login flow, or release dist-tags.
 ### Existing Docker smoke coverage
 `scripts/docker-onboard-smoke.sh` already does useful setup work:
 - builds `Dockerfile.onboard-smoke`
 - runs `paperclipai@${PAPERCLIPAI_VERSION}` inside Docker
 - waits for health
 - signs up or signs in a smoke admin user
 - generates and accepts the bootstrap CEO invite in authenticated mode
 - verifies a board session and `/api/companies`
 That means the hard bootstrap problem is mostly solved already. The main gap is that the script is human-oriented and never hands control to a browser test.
 ### Existing CI shape
 The repo already has:
 - `.github/workflows/e2e.yml` for manual Playwright runs against local source
 - `.github/workflows/release.yml` for canary publish on `master` and manual stable promotion
 So the right move is to extend the current test/release system, not create a parallel one.
 ## Product Decision
 ### 1. The release smoke should stay deterministic and token-free
 The first version should not require OpenAI, Anthropic, or external agent credentials.
 Use the onboarding flow with a deterministic adapter that can run on a stock GitHub runner and inside the published Docker install. The existing `process` adapter with a trivial command is the right base path for this release gate.
 That keeps this test focused on:
 - release packaging
 - auth/bootstrap
 - UI routing
 - onboarding contract
 - agent creation
 - heartbeat invocation plumbing
 Later we can add a second credentialed smoke lane for real model-backed agents.
 ### 2. Smoke credentials become an explicit test contract
 The current defaults in `scripts/docker-onboard-smoke.sh` should be treated as stable test fixtures:
 - email: `smoke-admin@paperclip.local`
 - password: `paperclip-smoke-password`
 The browser test should log in with those exact values unless overridden by env vars.
 ### 3. Published-package smoke and source-tree E2E stay separate
 Keep two lanes:
 - source-tree E2E for feature development
 - published Docker release smoke for release confidence
 They overlap on onboarding assertions, but they guard different failure classes.
 ## Proposed Design
 ## 1. Add a CI-friendly Docker smoke harness
 Refactor `scripts/docker-onboard-smoke.sh` so it can run in two modes:
 - interactive mode
  - current behavior
  - streams logs and waits in foreground for manual inspection
 - CI mode
  - starts the container
  - waits for health and authenticated bootstrap
  - prints machine-readable metadata
  - exits while leaving the container running for Playwright
 Recommended shape:
 - keep `scripts/docker-onboard-smoke.sh` as the public entry point
 - add a `SMOKE_DETACH=true` or `--detach` mode
 - emit a JSON blob or `.env` file containing:
  - `SMOKE_BASE_URL`
  - `SMOKE_ADMIN_EMAIL`
  - `SMOKE_ADMIN_PASSWORD`
  - `SMOKE_CONTAINER_NAME`
  - `SMOKE_DATA_DIR`
 The workflow and Playwright tests can then consume the emitted metadata instead of scraping logs.
 ### Why this matters
 The current script always tails logs and then blocks on `wait "$LOG_PID"`. That is convenient for manual smoke testing, but it is the wrong shape for CI orchestration.
 ## 2. Add a dedicated Playwright release-smoke spec
 Create a second Playwright entry point specifically for published Docker installs, for example:
 - `tests/release-smoke/playwright.config.ts`
 - `tests/release-smoke/docker-auth-onboarding.spec.ts`
 This suite should not use Playwright `webServer`, because the app server will already be running inside Docker.
 ### Browser scenario
 The first release-smoke scenario should validate:
 1. open `/`
 2. unauthenticated user is redirected to `/auth`
 3. sign in using the smoke credentials
 4. authenticated user lands on onboarding when no companies exist
 5. onboarding wizard appears with the expected step labels
 6. create a company
 7. create the first agent using `process`
 8. create the initial issue
 9. finish onboarding and open the created issue
 10. verify via API:
    - company exists
    - CEO agent exists
    - issue exists and is assigned to the CEO
 11. verify the first heartbeat run was triggered:
    - either by checking issue status changed from initial state, or
    - by checking agent/runs API shows a run for the CEO, or
    - both
 The test should tolerate the run completing quickly. For this reason, the assertion should accept:
 - `queued`
 - `running`
 - `succeeded`
 and similarly for issue progression if the issue status changes before the assertion runs.
 ### Why a separate spec instead of reusing `tests/e2e/onboarding.spec.ts`
 The local-source test and release-smoke test have different assumptions:
 - different server lifecycle
 - different auth path
 - different deployment mode
 - published npm package instead of local workspace code
 Trying to force both through one spec will make both worse.
 ## 3. Add a release-smoke workflow in GitHub Actions
 Add a workflow dedicated to this surface, ideally reusable:
 - `.github/workflows/release-smoke.yml`
 Recommended triggers:
 - `workflow_dispatch`
 - `workflow_call`
 Recommended inputs:
 - `paperclip_version`
  - `canary` or `latest`
 - `host_port`
  - optional, default runner-safe port
 - `artifact_name`
  - optional for clearer uploads
 ### Job outline
 1. checkout repo
 2. install Node/pnpm
 3. install Playwright browser dependencies
 4. launch Docker smoke harness in detached mode with the chosen dist-tag
 5. run the release-smoke Playwright suite against the returned base URL
 6. always collect diagnostics:
   - Playwright report
   - screenshots
   - trace
   - `docker logs`
   - harness metadata file
 7. stop and remove container
 ### Why a reusable workflow
 This lets us:
 - run the smoke manually on demand
 - call it from `release.yml`
 - reuse the same job for both `canary` and `latest`
 ## 4. Integrate it into release automation incrementally
 ### Phase A: Manual workflow only
 First ship the workflow as manual-only so the harness and test can be stabilized without blocking releases.
 ### Phase B: Run automatically after canary publish
 After `publish_canary` succeeds in `.github/workflows/release.yml`, call the reusable release-smoke workflow with:
 - `paperclip_version=canary`
 This proves the just-published public canary really boots and onboards.
 ### Phase C: Run automatically after stable publish
 After `publish_stable` succeeds, call the same workflow with:
 - `paperclip_version=latest`
 This gives us post-publish confirmation that the stable dist-tag is healthy.
 ### Important nuance
 Testing `latest` from npm cannot happen before stable publish, because the package under test does not exist under `latest` yet. So the `latest` smoke is a post-publish verification, not a pre-publish gate.
 If we later want a true pre-publish stable gate, that should be a separate source-ref or locally built package smoke job.
 ## 5. Make diagnostics first-class
 This workflow is only valuable if failures are fast to debug.
 Always capture:
 - Playwright HTML report
 - Playwright trace on failure
 - final screenshot on failure
 - full `docker logs` output
 - emitted smoke metadata
 - optional `curl /api/health` snapshot
 Without that, the test will become a flaky black box and people will stop trusting it.
 ## Implementation Plan
 ## Phase 1: Harness refactor
 Files:
 - `scripts/docker-onboard-smoke.sh`
 - optionally `scripts/lib/docker-onboard-smoke.sh` or similar helper
 - `doc/DOCKER.md`
 - `doc/RELEASING.md`
 Tasks:
 1. Add detached/CI mode to the Docker smoke script.
 2. Make the script emit machine-readable connection metadata.
 3. Keep the current interactive manual mode intact.
 4. Add reliable cleanup commands for CI.
 Acceptance:
 - a script invocation can start the published Docker app, auto-bootstrap it, and return control to the caller with enough metadata for browser automation
 ## Phase 2: Browser release-smoke suite
 Files:
 - `tests/release-smoke/playwright.config.ts`
 - `tests/release-smoke/docker-auth-onboarding.spec.ts`
 - root `package.json`
 Tasks:
 1. Add a dedicated Playwright config for external server testing.
 2. Implement login + onboarding + CEO creation flow.
 3. Assert a CEO run was created or completed.
 4. Add a root script such as:
   - `test:release-smoke`
 Acceptance:
 - the suite passes locally against both:
  - `PAPERCLIPAI_VERSION=canary`
  - `PAPERCLIPAI_VERSION=latest`
 ## Phase 3: GitHub Actions workflow
 Files:
 - `.github/workflows/release-smoke.yml`
 Tasks:
 1. Add manual and reusable workflow entry points.
 2. Install Chromium and runner dependencies.
 3. Start Docker smoke in detached mode.
 4. Run the release-smoke Playwright suite.
 5. Upload diagnostics artifacts.
 Acceptance:
 - a maintainer can run the workflow manually for either `canary` or `latest`
 ## Phase 4: Release workflow integration
 Files:
 - `.github/workflows/release.yml`
 - `doc/RELEASING.md`
 Tasks:
 1. Trigger release smoke automatically after canary publish.
 2. Trigger release smoke automatically after stable publish.
 3. Document expected behavior and failure handling.
 Acceptance:
 - canary releases automatically produce a published-package browser smoke result
 - stable releases automatically produce a `latest` browser smoke result
 ## Phase 5: Future extension for real model-backed agent validation
 Not part of the first implementation, but this should be the next layer after the deterministic lane is stable.
 Possible additions:
 - a second Playwright project gated on repo secrets
 - real `claude_local` or `codex_local` adapter validation in Docker-capable environments
 - assertion that the CEO posts a real task/comment artifact
 - stable release holdback until the credentialed lane passes
 This should stay optional until the token-free lane is trustworthy.
 ## Acceptance Criteria
 The plan is complete when the implemented system can demonstrate all of the following:
 1. A published `paperclipai@canary` Docker install can be smoke-tested by Playwright in CI.
 2. A published `paperclipai@latest` Docker install can be smoke-tested by Playwright in CI.
 3. The test logs into authenticated mode with the smoke credentials.
 4. The test sees onboarding for a fresh instance.
 5. The test completes onboarding in the browser.
 6. The test verifies the initial CEO agent was created.
 7. The test verifies at least one CEO heartbeat run was triggered.
 8. Failures produce actionable artifacts rather than just a red job.
 ## Risks And Decisions To Make
 ### 1. Fast process runs may finish before the UI visibly updates
 That is expected. The assertions should prefer API polling for run existence/status rather than only visual indicators.
 ### 2. `latest` smoke is post-publish, not preventive
 This is a real limitation of testing the published dist-tag itself. It is still valuable, but it should not be confused with a pre-publish gate.
 ### 3. We should not overcouple the test to cosmetic onboarding text
 The important contract is flow success, created entities, and run creation. Use visible labels sparingly and prefer stable semantic selectors where possible.
 ### 4. Keep the smoke adapter path boring
 For release safety, the first test should use the most boring runnable adapter possible. This is not the place to validate every adapter.
 ## Recommended First Slice
 If we want the fastest path to value, ship this in order:
 1. add detached mode to `scripts/docker-onboard-smoke.sh`
 2. add one Playwright spec for authenticated login + onboarding + CEO run verification
 3. add manual `release-smoke.yml`
 4. once stable, wire canary into `release.yml`
 5. after that, wire stable `latest` smoke into `release.yml`
 That gives release confidence quickly without turning the first version into a large CI redesign.
--- a/package.json
+++ b/package.json
@@ -29,7 +29,9 @@
    "smoke:openclaw-docker-ui": "./scripts/smoke/openclaw-docker-ui.sh",
    "smoke:openclaw-sse-standalone": "./scripts/smoke/openclaw-sse-standalone.sh",
    "test:e2e": "npx playwright test --config tests/e2e/playwright.config.ts",
-    "test:e2e:headed": "npx playwright test --config tests/e2e/playwright.config.ts --headed"
+    "test:e2e:headed": "npx playwright test --config tests/e2e/playwright.config.ts --headed",
    "test:release-smoke": "npx playwright test --config tests/release-smoke/playwright.config.ts",
    "test:release-smoke:headed": "npx playwright test --config tests/release-smoke/playwright.config.ts --headed"
  },
  "devDependencies": {
    "cross-env": "^10.1.0",
--- a/scripts/docker-onboard-smoke.sh
+++ b/scripts/docker-onboard-smoke.sh
@@ -7,6 +7,8 @@ HOST_PORT="${HOST_PORT:-3131}"
 PAPERCLIPAI_VERSION="${PAPERCLIPAI_VERSION:-latest}"
 DATA_DIR="${DATA_DIR:-$REPO_ROOT/data/docker-onboard-smoke}"
 HOST_UID="${HOST_UID:-$(id -u)}"
 SMOKE_DETACH="${SMOKE_DETACH:-false}"
 SMOKE_METADATA_FILE="${SMOKE_METADATA_FILE:-}"
 PAPERCLIP_DEPLOYMENT_MODE="${PAPERCLIP_DEPLOYMENT_MODE:-authenticated}"
 PAPERCLIP_DEPLOYMENT_EXPOSURE="${PAPERCLIP_DEPLOYMENT_EXPOSURE:-private}"
 PAPERCLIP_PUBLIC_URL="${PAPERCLIP_PUBLIC_URL:-http://localhost:${HOST_PORT}}"
@@ -18,6 +20,7 @@ CONTAINER_NAME="${IMAGE_NAME//[^a-zA-Z0-9_.-]/-}"
 LOG_PID=""
 COOKIE_JAR=""
 TMP_DIR=""
 PRESERVE_CONTAINER_ON_EXIT="false"
 mkdir -p "$DATA_DIR"
@@ -25,7 +28,9 @@ cleanup() {
  if [[ -n "$LOG_PID" ]]; then
    kill "$LOG_PID" >/dev/null 2>&1 || true
  fi
-  docker stop "$CONTAINER_NAME" >/dev/null 2>&1 || true
+  if [[ "$PRESERVE_CONTAINER_ON_EXIT" != "true" ]]; then
    docker stop "$CONTAINER_NAME" >/dev/null 2>&1 || true
  fi
  if [[ -n "$TMP_DIR" && -d "$TMP_DIR" ]]; then
    rm -rf "$TMP_DIR"
  fi
@@ -33,6 +38,12 @@ cleanup() {
 trap cleanup EXIT INT TERM
 container_is_running() {
  local running
  running="$(docker inspect -f '{{.State.Running}}' "$CONTAINER_NAME" 2>/dev/null || true)"
  [[ "$running" == "true" ]]
 }
 wait_for_http() {
  local url="$1"
  local attempts="${2:-60}"
@@ -42,11 +53,36 @@ wait_for_http() {
    if curl -fsS "$url" >/dev/null 2>&1; then
      return 0
    fi
    if ! container_is_running; then
      echo "Smoke bootstrap failed: container $CONTAINER_NAME exited before $url became ready" >&2
      docker logs "$CONTAINER_NAME" >&2 || true
      return 1
    fi
    sleep "$sleep_seconds"
  done
  if ! container_is_running; then
    echo "Smoke bootstrap failed: container $CONTAINER_NAME exited before readiness check completed" >&2
    docker logs "$CONTAINER_NAME" >&2 || true
  fi
  return 1
 }
 write_metadata_file() {
  if [[ -z "$SMOKE_METADATA_FILE" ]]; then
    return 0
  fi
  mkdir -p "$(dirname "$SMOKE_METADATA_FILE")"
  {
    printf 'SMOKE_BASE_URL=%q\n' "$PAPERCLIP_PUBLIC_URL"
    printf 'SMOKE_ADMIN_EMAIL=%q\n' "$SMOKE_ADMIN_EMAIL"
    printf 'SMOKE_ADMIN_PASSWORD=%q\n' "$SMOKE_ADMIN_PASSWORD"
    printf 'SMOKE_CONTAINER_NAME=%q\n' "$CONTAINER_NAME"
    printf 'SMOKE_DATA_DIR=%q\n' "$DATA_DIR"
    printf 'SMOKE_IMAGE_NAME=%q\n' "$IMAGE_NAME"
    printf 'SMOKE_PAPERCLIPAI_VERSION=%q\n' "$PAPERCLIPAI_VERSION"
  } >"$SMOKE_METADATA_FILE"
 }
 generate_bootstrap_invite_url() {
  local bootstrap_output
  local bootstrap_status
@@ -214,9 +250,12 @@ echo "==> Running onboard smoke container"
 echo "    UI should be reachable at: http://localhost:$HOST_PORT"
 echo "    Public URL: $PAPERCLIP_PUBLIC_URL"
 echo "    Smoke auto-bootstrap: $SMOKE_AUTO_BOOTSTRAP"
 echo "    Detached mode: $SMOKE_DETACH"
 echo "    Data dir: $DATA_DIR"
 echo "    Deployment: $PAPERCLIP_DEPLOYMENT_MODE/$PAPERCLIP_DEPLOYMENT_EXPOSURE"
-echo "    Live output: onboard banner and server logs stream in this terminal (Ctrl+C to stop)"
+if [[ "$SMOKE_DETACH" != "true" ]]; then
  echo "    Live output: onboard banner and server logs stream in this terminal (Ctrl+C to stop)"
 fi
 docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true
@@ -231,8 +270,10 @@ docker run -d --rm \
  -v "$DATA_DIR:/paperclip" \
  "$IMAGE_NAME" >/dev/null
-docker logs -f "$CONTAINER_NAME" &
+if [[ "$SMOKE_DETACH" != "true" ]]; then
-LOG_PID=$!
+  docker logs -f "$CONTAINER_NAME" &
  LOG_PID=$!
 fi
 TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/paperclip-onboard-smoke.XXXXXX")"
 COOKIE_JAR="$TMP_DIR/cookies.txt"
@@ -246,4 +287,17 @@ if [[ "$SMOKE_AUTO_BOOTSTRAP" == "true" && "$PAPERCLIP_DEPLOYMENT_MODE" == "auth
  auto_bootstrap_authenticated_smoke
 fi
 write_metadata_file
 if [[ "$SMOKE_DETACH" == "true" ]]; then
  PRESERVE_CONTAINER_ON_EXIT="true"
  echo "==> Smoke container ready for automation"
  echo "    Smoke base URL: $PAPERCLIP_PUBLIC_URL"
  echo "    Smoke admin credentials: $SMOKE_ADMIN_EMAIL / $SMOKE_ADMIN_PASSWORD"
  if [[ -n "$SMOKE_METADATA_FILE" ]]; then
    echo "    Smoke metadata file: $SMOKE_METADATA_FILE"
  fi
  exit 0
 fi
 wait "$LOG_PID"
--- a/tests/release-smoke/docker-auth-onboarding.spec.ts
+++ b/tests/release-smoke/docker-auth-onboarding.spec.ts
@@ -0,0 +1,146 @@
 import { expect, test, type Page } from "@playwright/test";
 const ADMIN_EMAIL =
  process.env.PAPERCLIP_RELEASE_SMOKE_EMAIL ??
  process.env.SMOKE_ADMIN_EMAIL ??
  "smoke-admin@paperclip.local";
 const ADMIN_PASSWORD =
  process.env.PAPERCLIP_RELEASE_SMOKE_PASSWORD ??
  process.env.SMOKE_ADMIN_PASSWORD ??
  "paperclip-smoke-password";
 const COMPANY_NAME = `Release-Smoke-${Date.now()}`;
 const AGENT_NAME = "CEO";
 const TASK_TITLE = "Release smoke task";
 async function signIn(page: Page) {
  await page.goto("/");
  await expect(page).toHaveURL(/\/auth/);
  await page.locator('input[type="email"]').fill(ADMIN_EMAIL);
  await page.locator('input[type="password"]').fill(ADMIN_PASSWORD);
  await page.getByRole("button", { name: "Sign In" }).click();
  await expect(page).not.toHaveURL(/\/auth/, { timeout: 20_000 });
 }
 async function openOnboarding(page: Page) {
  const wizardHeading = page.locator("h3", { hasText: "Name your company" });
  const startButton = page.getByRole("button", { name: "Start Onboarding" });
  await expect(wizardHeading.or(startButton)).toBeVisible({ timeout: 20_000 });
  if (await startButton.isVisible()) {
    await startButton.click();
  }
  await expect(wizardHeading).toBeVisible({ timeout: 10_000 });
 }
 test.describe("Docker authenticated onboarding smoke", () => {
  test("logs in, completes onboarding, and triggers the first CEO run", async ({
    page,
  }) => {
    await signIn(page);
    await openOnboarding(page);
    await page.locator('input[placeholder="Acme Corp"]').fill(COMPANY_NAME);
    await page.getByRole("button", { name: "Next" }).click();
    await expect(
      page.locator("h3", { hasText: "Create your first agent" })
    ).toBeVisible({ timeout: 10_000 });
    await expect(page.locator('input[placeholder="CEO"]')).toHaveValue(AGENT_NAME);
    await page.getByRole("button", { name: "Process" }).click();
    await page.locator('input[placeholder="e.g. node, python"]').fill("echo");
    await page
      .locator('input[placeholder="e.g. script.js, --flag"]')
      .fill("release smoke");
    await page.getByRole("button", { name: "Next" }).click();
    await expect(
      page.locator("h3", { hasText: "Give it something to do" })
    ).toBeVisible({ timeout: 10_000 });
    await page
      .locator('input[placeholder="e.g. Research competitor pricing"]')
      .fill(TASK_TITLE);
    await page.getByRole("button", { name: "Next" }).click();
    await expect(
      page.locator("h3", { hasText: "Ready to launch" })
    ).toBeVisible({ timeout: 10_000 });
    await expect(page.getByText(COMPANY_NAME)).toBeVisible();
    await expect(page.getByText(AGENT_NAME)).toBeVisible();
    await expect(page.getByText(TASK_TITLE)).toBeVisible();
    await page.getByRole("button", { name: "Create & Open Issue" }).click();
    await expect(page).toHaveURL(/\/issues\//, { timeout: 10_000 });
    const baseUrl = new URL(page.url()).origin;
    const companiesRes = await page.request.get(`${baseUrl}/api/companies`);
    expect(companiesRes.ok()).toBe(true);
    const companies = (await companiesRes.json()) as Array<{ id: string; name: string }>;
    const company = companies.find((entry) => entry.name === COMPANY_NAME);
    expect(company).toBeTruthy();
    const agentsRes = await page.request.get(
      `${baseUrl}/api/companies/${company!.id}/agents`
    );
    expect(agentsRes.ok()).toBe(true);
    const agents = (await agentsRes.json()) as Array<{
      id: string;
      name: string;
      role: string;
      adapterType: string;
    }>;
    const ceoAgent = agents.find((entry) => entry.name === AGENT_NAME);
    expect(ceoAgent).toBeTruthy();
    expect(ceoAgent!.role).toBe("ceo");
    expect(ceoAgent!.adapterType).toBe("process");
    const issuesRes = await page.request.get(
      `${baseUrl}/api/companies/${company!.id}/issues`
    );
    expect(issuesRes.ok()).toBe(true);
    const issues = (await issuesRes.json()) as Array<{
      id: string;
      title: string;
      assigneeAgentId: string | null;
    }>;
    const issue = issues.find((entry) => entry.title === TASK_TITLE);
    expect(issue).toBeTruthy();
    expect(issue!.assigneeAgentId).toBe(ceoAgent!.id);
    await expect.poll(
      async () => {
        const runsRes = await page.request.get(
          `${baseUrl}/api/companies/${company!.id}/heartbeat-runs?agentId=${ceoAgent!.id}`
        );
        expect(runsRes.ok()).toBe(true);
        const runs = (await runsRes.json()) as Array<{
          agentId: string;
          invocationSource: string;
          status: string;
        }>;
        const latestRun = runs.find((entry) => entry.agentId === ceoAgent!.id);
        return latestRun
          ? {
              invocationSource: latestRun.invocationSource,
              status: latestRun.status,
            }
          : null;
      },
      {
        timeout: 30_000,
        intervals: [1_000, 2_000, 5_000],
      }
    ).toEqual(
      expect.objectContaining({
        invocationSource: "assignment",
        status: expect.stringMatching(/^(queued|running|succeeded)$/),
      })
    );
  });
 });
--- a/tests/release-smoke/playwright.config.ts
+++ b/tests/release-smoke/playwright.config.ts
@@ -0,0 +1,28 @@
 import { defineConfig } from "@playwright/test";
 const BASE_URL =
  process.env.PAPERCLIP_RELEASE_SMOKE_BASE_URL ?? "http://127.0.0.1:3232";
 export default defineConfig({
  testDir: ".",
  testMatch: "**/*.spec.ts",
  timeout: 90_000,
  expect: {
    timeout: 15_000,
  },
  retries: process.env.CI ? 1 : 0,
  use: {
    baseURL: BASE_URL,
    headless: true,
    screenshot: "only-on-failure",
    trace: "retain-on-failure",
  },
  projects: [
    {
      name: "chromium",
      use: { browserName: "chromium" },
    },
  ],
  outputDir: "./test-results",
  reporter: [["list"], ["html", { open: "never", outputFolder: "./playwright-report" }]],
 });