feat: add release smoke workflow

2026-03-18 07:59:32 -05:00
parent 3e0e15394a
commit 19f4a78f4a
9 changed files with 795 additions and 8 deletions
--- a/.github/workflows/release-smoke.yml
+++ b/.github/workflows/release-smoke.yml
@@ -0,0 +1,118 @@
+name: Release Smoke
+
+on:
+  workflow_dispatch:
+    inputs:
+      paperclip_version:
+        description: Published Paperclip dist-tag to test
+        required: true
+        default: canary
+        type: choice
+        options:
+          - canary
+          - latest
+      host_port:
+        description: Host port for the Docker smoke container
+        required: false
+        default: "3232"
+        type: string
+      artifact_name:
+        description: Artifact name for uploaded diagnostics
+        required: false
+        default: release-smoke
+        type: string
+  workflow_call:
+    inputs:
+      paperclip_version:
+        required: true
+        type: string
+      host_port:
+        required: false
+        default: "3232"
+        type: string
+      artifact_name:
+        required: false
+        default: release-smoke
+        type: string
+
+jobs:
+  smoke:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 9.15.4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 24
+          cache: pnpm
+
+      - name: Install dependencies
+        run: pnpm install --no-frozen-lockfile
+
+      - name: Install Playwright browser
+        run: npx playwright install --with-deps chromium
+
+      - name: Launch Docker smoke harness
+        run: |
+          metadata_file="$RUNNER_TEMP/release-smoke.env"
+          HOST_PORT="${{ inputs.host_port }}" \
+          DATA_DIR="$RUNNER_TEMP/release-smoke-data" \
+          PAPERCLIPAI_VERSION="${{ inputs.paperclip_version }}" \
+          SMOKE_DETACH=true \
+          SMOKE_METADATA_FILE="$metadata_file" \
+          ./scripts/docker-onboard-smoke.sh
+          set -a
+          source "$metadata_file"
+          set +a
+          {
+            echo "SMOKE_BASE_URL=$SMOKE_BASE_URL"
+            echo "SMOKE_ADMIN_EMAIL=$SMOKE_ADMIN_EMAIL"
+            echo "SMOKE_ADMIN_PASSWORD=$SMOKE_ADMIN_PASSWORD"
+            echo "SMOKE_CONTAINER_NAME=$SMOKE_CONTAINER_NAME"
+            echo "SMOKE_DATA_DIR=$SMOKE_DATA_DIR"
+            echo "SMOKE_IMAGE_NAME=$SMOKE_IMAGE_NAME"
+            echo "SMOKE_PAPERCLIPAI_VERSION=$SMOKE_PAPERCLIPAI_VERSION"
+            echo "SMOKE_METADATA_FILE=$metadata_file"
+          } >> "$GITHUB_ENV"
+
+      - name: Run release smoke Playwright suite
+        env:
+          PAPERCLIP_RELEASE_SMOKE_BASE_URL: ${{ env.SMOKE_BASE_URL }}
+          PAPERCLIP_RELEASE_SMOKE_EMAIL: ${{ env.SMOKE_ADMIN_EMAIL }}
+          PAPERCLIP_RELEASE_SMOKE_PASSWORD: ${{ env.SMOKE_ADMIN_PASSWORD }}
+        run: pnpm run test:release-smoke
+
+      - name: Capture Docker logs
+        if: always()
+        run: |
+          if [[ -n "${SMOKE_CONTAINER_NAME:-}" ]]; then
+            docker logs "$SMOKE_CONTAINER_NAME" >"$RUNNER_TEMP/docker-onboard-smoke.log" 2>&1 || true
+          fi
+
+      - name: Upload diagnostics
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ inputs.artifact_name }}
+          path: |
+            ${{ runner.temp }}/docker-onboard-smoke.log
+            ${{ env.SMOKE_METADATA_FILE }}
+            tests/release-smoke/playwright-report/
+            tests/release-smoke/test-results/
+          retention-days: 14
+
+      - name: Stop Docker smoke container
+        if: always()
+        run: |
+          if [[ -n "${SMOKE_CONTAINER_NAME:-}" ]]; then
+            docker rm -f "$SMOKE_CONTAINER_NAME" >/dev/null 2>&1 || true
+          fi
--- a/.gitignore
+++ b/.gitignore
@@ -46,5 +46,7 @@ tmp/
 # Playwright
 tests/e2e/test-results/
 tests/e2e/playwright-report/
+tests/release-smoke/test-results/
+tests/release-smoke/playwright-report/
 .superset/
 .claude/worktrees/
--- a/doc/DOCKER.md
+++ b/doc/DOCKER.md
@@ -120,6 +120,7 @@ Useful overrides:
 ```sh
 HOST_PORT=3200 PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh
 PAPERCLIP_DEPLOYMENT_MODE=authenticated PAPERCLIP_DEPLOYMENT_EXPOSURE=private ./scripts/docker-onboard-smoke.sh
+SMOKE_DETACH=true SMOKE_METADATA_FILE=/tmp/paperclip-smoke.env PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh
 ```

 Notes:
@@ -131,4 +132,5 @@ Notes:
 - Smoke script also defaults `PAPERCLIP_PUBLIC_URL` to `http://localhost:<HOST_PORT>` so bootstrap invite URLs and auth callbacks use the reachable host port instead of the container's internal `3100`.
 - In authenticated mode, the smoke script defaults `SMOKE_AUTO_BOOTSTRAP=true` and drives the real bootstrap path automatically: it signs up a real user, runs `paperclipai auth bootstrap-ceo` inside the container to mint a real bootstrap invite, accepts that invite over HTTP, and verifies board session access.
 - Run the script in the foreground to watch the onboarding flow; stop with `Ctrl+C` after validation.
+- Set `SMOKE_DETACH=true` to leave the container running for automation and optionally write shell-ready metadata to `SMOKE_METADATA_FILE`.
 - The image definition is in `Dockerfile.onboard-smoke`.
--- a/doc/RELEASING.md
+++ b/doc/RELEASING.md
@@ -69,6 +69,8 @@ Users install canaries with:

 ```bash
 npx paperclipai@canary onboard
+# or
+npx paperclipai@canary onboard --data-dir "$(mktemp -d /tmp/paperclip-canary.XXXXXX)"
 ```

 ### Stable
@@ -165,13 +167,22 @@ HOST_PORT=3232 DATA_DIR=./data/release-smoke-canary PAPERCLIPAI_VERSION=canary .
 HOST_PORT=3233 DATA_DIR=./data/release-smoke-stable PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh
 ```

+Automated browser smoke is also available:
+
+```bash
+gh workflow run release-smoke.yml -f paperclip_version=canary
+gh workflow run release-smoke.yml -f paperclip_version=latest
+```
+
 Minimum checks:

 - `npx paperclipai@canary onboard` installs
 - onboarding completes without crashes
- the server boots
- the UI loads
- basic company creation and dashboard load work
+- authenticated login works with the smoke credentials
+- the browser lands in onboarding on a fresh instance
+- company creation succeeds
+- the first CEO agent is created
+- the first CEO heartbeat run is triggered

 ## Rollback

--- a/doc/plans/2026-03-17-docker-release-browser-e2e.md
+++ b/doc/plans/2026-03-17-docker-release-browser-e2e.md
@@ -0,0 +1,424 @@
+# Docker Release Browser E2E Plan
+
+## Context
+
+Today release smoke testing for published Paperclip packages is manual and shell-driven:
+
+```sh
+HOST_PORT=3232 DATA_DIR=./data/release-smoke-canary PAPERCLIPAI_VERSION=canary ./scripts/docker-onboard-smoke.sh
+HOST_PORT=3233 DATA_DIR=./data/release-smoke-stable PAPERCLIPAI_VERSION=latest ./scripts/docker-onboard-smoke.sh
+```
+
+That is useful because it exercises the same public install surface users hit:
+
+- Docker
+- `npx paperclipai@canary`
+- `npx paperclipai@latest`
+- authenticated bootstrap flow
+
+But it still leaves the most important release questions to a human with a browser:
+
+- can I sign in with the smoke credentials?
+- do I land in onboarding?
+- can I complete onboarding?
+- does the initial CEO agent actually get created and run?
+
+The repo already has two adjacent pieces:
+
+- `tests/e2e/onboarding.spec.ts` covers the onboarding wizard against the local source tree
+- `scripts/docker-onboard-smoke.sh` boots a published Docker install and auto-bootstraps authenticated mode, but only verifies the API/session layer
+
+What is missing is one deterministic browser test that joins those two paths.
+
+## Goal
+
+Add a release-grade Docker-backed browser E2E that validates the published `canary` and `latest` installs end to end:
+
+1. boot the published package in Docker
+2. sign in with known smoke credentials
+3. verify the user is routed into onboarding
+4. complete onboarding in the browser
+5. verify the first CEO agent exists
+6. verify the initial CEO run was triggered and reached a terminal or active state
+
+Then wire that test into GitHub Actions so release validation is no longer manual-only.
+
+## Recommendation In One Sentence
+
+Turn the current Docker smoke script into a machine-friendly test harness, add a dedicated Playwright release-smoke spec that drives the authenticated browser flow against published Docker installs, and run it in GitHub Actions for both `canary` and `latest`.
+
+## What We Have Today
+
+### Existing local browser coverage
+
+`tests/e2e/onboarding.spec.ts` already proves the onboarding wizard can:
+
+- create a company
+- create a CEO agent
+- create an initial issue
+- optionally observe task progress
+
+That is a good base, but it does not validate the public npm package, Docker path, authenticated login flow, or release dist-tags.
+
+### Existing Docker smoke coverage
+
+`scripts/docker-onboard-smoke.sh` already does useful setup work:
+
+- builds `Dockerfile.onboard-smoke`
+- runs `paperclipai@${PAPERCLIPAI_VERSION}` inside Docker
+- waits for health
+- signs up or signs in a smoke admin user
+- generates and accepts the bootstrap CEO invite in authenticated mode
+- verifies a board session and `/api/companies`
+
+That means the hard bootstrap problem is mostly solved already. The main gap is that the script is human-oriented and never hands control to a browser test.
+
+### Existing CI shape
+
+The repo already has:
+
+- `.github/workflows/e2e.yml` for manual Playwright runs against local source
+- `.github/workflows/release.yml` for canary publish on `master` and manual stable promotion
+
+So the right move is to extend the current test/release system, not create a parallel one.
+
+## Product Decision
+
+### 1. The release smoke should stay deterministic and token-free
+
+The first version should not require OpenAI, Anthropic, or external agent credentials.
+
+Use the onboarding flow with a deterministic adapter that can run on a stock GitHub runner and inside the published Docker install. The existing `process` adapter with a trivial command is the right base path for this release gate.
+
+That keeps this test focused on:
+
+- release packaging
+- auth/bootstrap
+- UI routing
+- onboarding contract
+- agent creation
+- heartbeat invocation plumbing
+
+Later we can add a second credentialed smoke lane for real model-backed agents.
+
+### 2. Smoke credentials become an explicit test contract
+
+The current defaults in `scripts/docker-onboard-smoke.sh` should be treated as stable test fixtures:
+
+- email: `smoke-admin@paperclip.local`
+- password: `paperclip-smoke-password`
+
+The browser test should log in with those exact values unless overridden by env vars.
+
+### 3. Published-package smoke and source-tree E2E stay separate
+
+Keep two lanes:
+
+- source-tree E2E for feature development
+- published Docker release smoke for release confidence
+
+They overlap on onboarding assertions, but they guard different failure classes.
+
+## Proposed Design
+
+## 1. Add a CI-friendly Docker smoke harness
+
+Refactor `scripts/docker-onboard-smoke.sh` so it can run in two modes:
+
+- interactive mode
+  - current behavior
+  - streams logs and waits in foreground for manual inspection
+- CI mode
+  - starts the container
+  - waits for health and authenticated bootstrap
+  - prints machine-readable metadata
+  - exits while leaving the container running for Playwright
+
+Recommended shape:
+
+- keep `scripts/docker-onboard-smoke.sh` as the public entry point
+- add a `SMOKE_DETACH=true` or `--detach` mode
+- emit a JSON blob or `.env` file containing:
+  - `SMOKE_BASE_URL`
+  - `SMOKE_ADMIN_EMAIL`
+  - `SMOKE_ADMIN_PASSWORD`
+  - `SMOKE_CONTAINER_NAME`
+  - `SMOKE_DATA_DIR`
+
+The workflow and Playwright tests can then consume the emitted metadata instead of scraping logs.
+
+### Why this matters
+
+The current script always tails logs and then blocks on `wait "$LOG_PID"`. That is convenient for manual smoke testing, but it is the wrong shape for CI orchestration.
+
+## 2. Add a dedicated Playwright release-smoke spec
+
+Create a second Playwright entry point specifically for published Docker installs, for example:
+
+- `tests/release-smoke/playwright.config.ts`
+- `tests/release-smoke/docker-auth-onboarding.spec.ts`
+
+This suite should not use Playwright `webServer`, because the app server will already be running inside Docker.
+
+### Browser scenario
+
+The first release-smoke scenario should validate:
+
+1. open `/`
+2. unauthenticated user is redirected to `/auth`
+3. sign in using the smoke credentials
+4. authenticated user lands on onboarding when no companies exist
+5. onboarding wizard appears with the expected step labels
+6. create a company
+7. create the first agent using `process`
+8. create the initial issue
+9. finish onboarding and open the created issue
+10. verify via API:
+    - company exists
+    - CEO agent exists
+    - issue exists and is assigned to the CEO
+11. verify the first heartbeat run was triggered:
+    - either by checking issue status changed from initial state, or
+    - by checking agent/runs API shows a run for the CEO, or
+    - both
+
+The test should tolerate the run completing quickly. For this reason, the assertion should accept:
+
+- `queued`
+- `running`
+- `succeeded`
+
+and similarly for issue progression if the issue status changes before the assertion runs.
+
+### Why a separate spec instead of reusing `tests/e2e/onboarding.spec.ts`
+
+The local-source test and release-smoke test have different assumptions:
+
+- different server lifecycle
+- different auth path
+- different deployment mode
+- published npm package instead of local workspace code
+
+Trying to force both through one spec will make both worse.
+
+## 3. Add a release-smoke workflow in GitHub Actions
+
+Add a workflow dedicated to this surface, ideally reusable:
+
+- `.github/workflows/release-smoke.yml`
+
+Recommended triggers:
+
+- `workflow_dispatch`
+- `workflow_call`
+
+Recommended inputs:
+
+- `paperclip_version`
+  - `canary` or `latest`
+- `host_port`
+  - optional, default runner-safe port
+- `artifact_name`
+  - optional for clearer uploads
+
+### Job outline
+
+1. checkout repo
+2. install Node/pnpm
+3. install Playwright browser dependencies
+4. launch Docker smoke harness in detached mode with the chosen dist-tag
+5. run the release-smoke Playwright suite against the returned base URL
+6. always collect diagnostics:
+   - Playwright report
+   - screenshots
+   - trace
+   - `docker logs`
+   - harness metadata file
+7. stop and remove container
+
+### Why a reusable workflow
+
+This lets us:
+
+- run the smoke manually on demand
+- call it from `release.yml`
+- reuse the same job for both `canary` and `latest`
+
+## 4. Integrate it into release automation incrementally
+
+### Phase A: Manual workflow only
+
+First ship the workflow as manual-only so the harness and test can be stabilized without blocking releases.
+
+### Phase B: Run automatically after canary publish
+
+After `publish_canary` succeeds in `.github/workflows/release.yml`, call the reusable release-smoke workflow with:
+
+- `paperclip_version=canary`
+
+This proves the just-published public canary really boots and onboards.
+
+### Phase C: Run automatically after stable publish
+
+After `publish_stable` succeeds, call the same workflow with:
+
+- `paperclip_version=latest`
+
+This gives us post-publish confirmation that the stable dist-tag is healthy.
+
+### Important nuance
+
+Testing `latest` from npm cannot happen before stable publish, because the package under test does not exist under `latest` yet. So the `latest` smoke is a post-publish verification, not a pre-publish gate.
+
+If we later want a true pre-publish stable gate, that should be a separate source-ref or locally built package smoke job.
+
+## 5. Make diagnostics first-class
+
+This workflow is only valuable if failures are fast to debug.
+
+Always capture:
+
+- Playwright HTML report
+- Playwright trace on failure
+- final screenshot on failure
+- full `docker logs` output
+- emitted smoke metadata
+- optional `curl /api/health` snapshot
+
+Without that, the test will become a flaky black box and people will stop trusting it.
+
+## Implementation Plan
+
+## Phase 1: Harness refactor
+
+Files:
+
+- `scripts/docker-onboard-smoke.sh`
+- optionally `scripts/lib/docker-onboard-smoke.sh` or similar helper
+- `doc/DOCKER.md`
+- `doc/RELEASING.md`
+
+Tasks:
+
+1. Add detached/CI mode to the Docker smoke script.
+2. Make the script emit machine-readable connection metadata.
+3. Keep the current interactive manual mode intact.
+4. Add reliable cleanup commands for CI.
+
+Acceptance:
+
+- a script invocation can start the published Docker app, auto-bootstrap it, and return control to the caller with enough metadata for browser automation
+
+## Phase 2: Browser release-smoke suite
+
+Files:
+
+- `tests/release-smoke/playwright.config.ts`
+- `tests/release-smoke/docker-auth-onboarding.spec.ts`
+- root `package.json`
+
+Tasks:
+
+1. Add a dedicated Playwright config for external server testing.
+2. Implement login + onboarding + CEO creation flow.
+3. Assert a CEO run was created or completed.
+4. Add a root script such as:
+   - `test:release-smoke`
+
+Acceptance:
+
+- the suite passes locally against both:
+  - `PAPERCLIPAI_VERSION=canary`
+  - `PAPERCLIPAI_VERSION=latest`
+
+## Phase 3: GitHub Actions workflow
+
+Files:
+
+- `.github/workflows/release-smoke.yml`
+
+Tasks:
+
+1. Add manual and reusable workflow entry points.
+2. Install Chromium and runner dependencies.
+3. Start Docker smoke in detached mode.
+4. Run the release-smoke Playwright suite.
+5. Upload diagnostics artifacts.
+
+Acceptance:
+
+- a maintainer can run the workflow manually for either `canary` or `latest`
+
+## Phase 4: Release workflow integration
+
+Files:
+
+- `.github/workflows/release.yml`
+- `doc/RELEASING.md`
+
+Tasks:
+
+1. Trigger release smoke automatically after canary publish.
+2. Trigger release smoke automatically after stable publish.
+3. Document expected behavior and failure handling.
+
+Acceptance:
+
+- canary releases automatically produce a published-package browser smoke result
+- stable releases automatically produce a `latest` browser smoke result
+
+## Phase 5: Future extension for real model-backed agent validation
+
+Not part of the first implementation, but this should be the next layer after the deterministic lane is stable.
+
+Possible additions:
+
+- a second Playwright project gated on repo secrets
+- real `claude_local` or `codex_local` adapter validation in Docker-capable environments
+- assertion that the CEO posts a real task/comment artifact
+- stable release holdback until the credentialed lane passes
+
+This should stay optional until the token-free lane is trustworthy.
+
+## Acceptance Criteria
+
+The plan is complete when the implemented system can demonstrate all of the following:
+
+1. A published `paperclipai@canary` Docker install can be smoke-tested by Playwright in CI.
+2. A published `paperclipai@latest` Docker install can be smoke-tested by Playwright in CI.
+3. The test logs into authenticated mode with the smoke credentials.
+4. The test sees onboarding for a fresh instance.
+5. The test completes onboarding in the browser.
+6. The test verifies the initial CEO agent was created.
+7. The test verifies at least one CEO heartbeat run was triggered.
+8. Failures produce actionable artifacts rather than just a red job.
+
+## Risks And Decisions To Make
+
+### 1. Fast process runs may finish before the UI visibly updates
+
+That is expected. The assertions should prefer API polling for run existence/status rather than only visual indicators.
+
+### 2. `latest` smoke is post-publish, not preventive
+
+This is a real limitation of testing the published dist-tag itself. It is still valuable, but it should not be confused with a pre-publish gate.
+
+### 3. We should not overcouple the test to cosmetic onboarding text
+
+The important contract is flow success, created entities, and run creation. Use visible labels sparingly and prefer stable semantic selectors where possible.
+
+### 4. Keep the smoke adapter path boring
+
+For release safety, the first test should use the most boring runnable adapter possible. This is not the place to validate every adapter.
+
+## Recommended First Slice
+
+If we want the fastest path to value, ship this in order:
+
+1. add detached mode to `scripts/docker-onboard-smoke.sh`
+2. add one Playwright spec for authenticated login + onboarding + CEO run verification
+3. add manual `release-smoke.yml`
+4. once stable, wire canary into `release.yml`
+5. after that, wire stable `latest` smoke into `release.yml`
+
+That gives release confidence quickly without turning the first version into a large CI redesign.
--- a/package.json
+++ b/package.json
@@ -29,7 +29,9 @@
    "smoke:openclaw-docker-ui": "./scripts/smoke/openclaw-docker-ui.sh",
    "smoke:openclaw-sse-standalone": "./scripts/smoke/openclaw-sse-standalone.sh",
    "test:e2e": "npx playwright test --config tests/e2e/playwright.config.ts",
-    "test:e2e:headed": "npx playwright test --config tests/e2e/playwright.config.ts --headed"
+    "test:e2e:headed": "npx playwright test --config tests/e2e/playwright.config.ts --headed",
+    "test:release-smoke": "npx playwright test --config tests/release-smoke/playwright.config.ts",
+    "test:release-smoke:headed": "npx playwright test --config tests/release-smoke/playwright.config.ts --headed"
  },
  "devDependencies": {
    "cross-env": "^10.1.0",
--- a/scripts/docker-onboard-smoke.sh
+++ b/scripts/docker-onboard-smoke.sh
@@ -7,6 +7,8 @@ HOST_PORT="${HOST_PORT:-3131}"
 PAPERCLIPAI_VERSION="${PAPERCLIPAI_VERSION:-latest}"
 DATA_DIR="${DATA_DIR:-$REPO_ROOT/data/docker-onboard-smoke}"
 HOST_UID="${HOST_UID:-$(id -u)}"
+SMOKE_DETACH="${SMOKE_DETACH:-false}"
+SMOKE_METADATA_FILE="${SMOKE_METADATA_FILE:-}"
 PAPERCLIP_DEPLOYMENT_MODE="${PAPERCLIP_DEPLOYMENT_MODE:-authenticated}"
 PAPERCLIP_DEPLOYMENT_EXPOSURE="${PAPERCLIP_DEPLOYMENT_EXPOSURE:-private}"
 PAPERCLIP_PUBLIC_URL="${PAPERCLIP_PUBLIC_URL:-http://localhost:${HOST_PORT}}"
@@ -18,6 +20,7 @@ CONTAINER_NAME="${IMAGE_NAME//[^a-zA-Z0-9_.-]/-}"
 LOG_PID=""
 COOKIE_JAR=""
 TMP_DIR=""
+PRESERVE_CONTAINER_ON_EXIT="false"

 mkdir -p "$DATA_DIR"

@@ -25,7 +28,9 @@ cleanup() {
  if [[ -n "$LOG_PID" ]]; then
    kill "$LOG_PID" >/dev/null 2>&1 || true
  fi
+  if [[ "$PRESERVE_CONTAINER_ON_EXIT" != "true" ]]; then
    docker stop "$CONTAINER_NAME" >/dev/null 2>&1 || true
+  fi
  if [[ -n "$TMP_DIR" && -d "$TMP_DIR" ]]; then
    rm -rf "$TMP_DIR"
  fi
@@ -33,6 +38,12 @@ cleanup() {

 trap cleanup EXIT INT TERM

+container_is_running() {
+  local running
+  running="$(docker inspect -f '{{.State.Running}}' "$CONTAINER_NAME" 2>/dev/null || true)"
+  [[ "$running" == "true" ]]
+}
+
 wait_for_http() {
  local url="$1"
  local attempts="${2:-60}"
@@ -42,11 +53,36 @@ wait_for_http() {
    if curl -fsS "$url" >/dev/null 2>&1; then
      return 0
    fi
+    if ! container_is_running; then
+      echo "Smoke bootstrap failed: container $CONTAINER_NAME exited before $url became ready" >&2
+      docker logs "$CONTAINER_NAME" >&2 || true
+      return 1
+    fi
    sleep "$sleep_seconds"
  done
+  if ! container_is_running; then
+    echo "Smoke bootstrap failed: container $CONTAINER_NAME exited before readiness check completed" >&2
+    docker logs "$CONTAINER_NAME" >&2 || true
+  fi
  return 1
 }

+write_metadata_file() {
+  if [[ -z "$SMOKE_METADATA_FILE" ]]; then
+    return 0
+  fi
+  mkdir -p "$(dirname "$SMOKE_METADATA_FILE")"
+  {
+    printf 'SMOKE_BASE_URL=%q\n' "$PAPERCLIP_PUBLIC_URL"
+    printf 'SMOKE_ADMIN_EMAIL=%q\n' "$SMOKE_ADMIN_EMAIL"
+    printf 'SMOKE_ADMIN_PASSWORD=%q\n' "$SMOKE_ADMIN_PASSWORD"
+    printf 'SMOKE_CONTAINER_NAME=%q\n' "$CONTAINER_NAME"
+    printf 'SMOKE_DATA_DIR=%q\n' "$DATA_DIR"
+    printf 'SMOKE_IMAGE_NAME=%q\n' "$IMAGE_NAME"
+    printf 'SMOKE_PAPERCLIPAI_VERSION=%q\n' "$PAPERCLIPAI_VERSION"
+  } >"$SMOKE_METADATA_FILE"
+}
+
 generate_bootstrap_invite_url() {
  local bootstrap_output
  local bootstrap_status
@@ -214,9 +250,12 @@ echo "==> Running onboard smoke container"
 echo "    UI should be reachable at: http://localhost:$HOST_PORT"
 echo "    Public URL: $PAPERCLIP_PUBLIC_URL"
 echo "    Smoke auto-bootstrap: $SMOKE_AUTO_BOOTSTRAP"
+echo "    Detached mode: $SMOKE_DETACH"
 echo "    Data dir: $DATA_DIR"
 echo "    Deployment: $PAPERCLIP_DEPLOYMENT_MODE/$PAPERCLIP_DEPLOYMENT_EXPOSURE"
+if [[ "$SMOKE_DETACH" != "true" ]]; then
  echo "    Live output: onboard banner and server logs stream in this terminal (Ctrl+C to stop)"
+fi

 docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true

@@ -231,8 +270,10 @@ docker run -d --rm \
  -v "$DATA_DIR:/paperclip" \
  "$IMAGE_NAME" >/dev/null

+if [[ "$SMOKE_DETACH" != "true" ]]; then
  docker logs -f "$CONTAINER_NAME" &
  LOG_PID=$!
+fi

 TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/paperclip-onboard-smoke.XXXXXX")"
 COOKIE_JAR="$TMP_DIR/cookies.txt"
@@ -246,4 +287,17 @@ if [[ "$SMOKE_AUTO_BOOTSTRAP" == "true" && "$PAPERCLIP_DEPLOYMENT_MODE" == "auth
  auto_bootstrap_authenticated_smoke
 fi

+write_metadata_file
+
+if [[ "$SMOKE_DETACH" == "true" ]]; then
+  PRESERVE_CONTAINER_ON_EXIT="true"
+  echo "==> Smoke container ready for automation"
+  echo "    Smoke base URL: $PAPERCLIP_PUBLIC_URL"
+  echo "    Smoke admin credentials: $SMOKE_ADMIN_EMAIL / $SMOKE_ADMIN_PASSWORD"
+  if [[ -n "$SMOKE_METADATA_FILE" ]]; then
+    echo "    Smoke metadata file: $SMOKE_METADATA_FILE"
+  fi
+  exit 0
+fi
+
 wait "$LOG_PID"
--- a/tests/release-smoke/docker-auth-onboarding.spec.ts
+++ b/tests/release-smoke/docker-auth-onboarding.spec.ts
@@ -0,0 +1,146 @@
+import { expect, test, type Page } from "@playwright/test";
+
+const ADMIN_EMAIL =
+  process.env.PAPERCLIP_RELEASE_SMOKE_EMAIL ??
+  process.env.SMOKE_ADMIN_EMAIL ??
+  "smoke-admin@paperclip.local";
+const ADMIN_PASSWORD =
+  process.env.PAPERCLIP_RELEASE_SMOKE_PASSWORD ??
+  process.env.SMOKE_ADMIN_PASSWORD ??
+  "paperclip-smoke-password";
+
+const COMPANY_NAME = `Release-Smoke-${Date.now()}`;
+const AGENT_NAME = "CEO";
+const TASK_TITLE = "Release smoke task";
+
+async function signIn(page: Page) {
+  await page.goto("/");
+  await expect(page).toHaveURL(/\/auth/);
+
+  await page.locator('input[type="email"]').fill(ADMIN_EMAIL);
+  await page.locator('input[type="password"]').fill(ADMIN_PASSWORD);
+  await page.getByRole("button", { name: "Sign In" }).click();
+
+  await expect(page).not.toHaveURL(/\/auth/, { timeout: 20_000 });
+}
+
+async function openOnboarding(page: Page) {
+  const wizardHeading = page.locator("h3", { hasText: "Name your company" });
+  const startButton = page.getByRole("button", { name: "Start Onboarding" });
+
+  await expect(wizardHeading.or(startButton)).toBeVisible({ timeout: 20_000 });
+
+  if (await startButton.isVisible()) {
+    await startButton.click();
+  }
+
+  await expect(wizardHeading).toBeVisible({ timeout: 10_000 });
+}
+
+test.describe("Docker authenticated onboarding smoke", () => {
+  test("logs in, completes onboarding, and triggers the first CEO run", async ({
+    page,
+  }) => {
+    await signIn(page);
+    await openOnboarding(page);
+
+    await page.locator('input[placeholder="Acme Corp"]').fill(COMPANY_NAME);
+    await page.getByRole("button", { name: "Next" }).click();
+
+    await expect(
+      page.locator("h3", { hasText: "Create your first agent" })
+    ).toBeVisible({ timeout: 10_000 });
+
+    await expect(page.locator('input[placeholder="CEO"]')).toHaveValue(AGENT_NAME);
+    await page.getByRole("button", { name: "Process" }).click();
+    await page.locator('input[placeholder="e.g. node, python"]').fill("echo");
+    await page
+      .locator('input[placeholder="e.g. script.js, --flag"]')
+      .fill("release smoke");
+    await page.getByRole("button", { name: "Next" }).click();
+
+    await expect(
+      page.locator("h3", { hasText: "Give it something to do" })
+    ).toBeVisible({ timeout: 10_000 });
+    await page
+      .locator('input[placeholder="e.g. Research competitor pricing"]')
+      .fill(TASK_TITLE);
+    await page.getByRole("button", { name: "Next" }).click();
+
+    await expect(
+      page.locator("h3", { hasText: "Ready to launch" })
+    ).toBeVisible({ timeout: 10_000 });
+    await expect(page.getByText(COMPANY_NAME)).toBeVisible();
+    await expect(page.getByText(AGENT_NAME)).toBeVisible();
+    await expect(page.getByText(TASK_TITLE)).toBeVisible();
+
+    await page.getByRole("button", { name: "Create & Open Issue" }).click();
+    await expect(page).toHaveURL(/\/issues\//, { timeout: 10_000 });
+
+    const baseUrl = new URL(page.url()).origin;
+
+    const companiesRes = await page.request.get(`${baseUrl}/api/companies`);
+    expect(companiesRes.ok()).toBe(true);
+    const companies = (await companiesRes.json()) as Array<{ id: string; name: string }>;
+    const company = companies.find((entry) => entry.name === COMPANY_NAME);
+    expect(company).toBeTruthy();
+
+    const agentsRes = await page.request.get(
+      `${baseUrl}/api/companies/${company!.id}/agents`
+    );
+    expect(agentsRes.ok()).toBe(true);
+    const agents = (await agentsRes.json()) as Array<{
+      id: string;
+      name: string;
+      role: string;
+      adapterType: string;
+    }>;
+    const ceoAgent = agents.find((entry) => entry.name === AGENT_NAME);
+    expect(ceoAgent).toBeTruthy();
+    expect(ceoAgent!.role).toBe("ceo");
+    expect(ceoAgent!.adapterType).toBe("process");
+
+    const issuesRes = await page.request.get(
+      `${baseUrl}/api/companies/${company!.id}/issues`
+    );
+    expect(issuesRes.ok()).toBe(true);
+    const issues = (await issuesRes.json()) as Array<{
+      id: string;
+      title: string;
+      assigneeAgentId: string | null;
+    }>;
+    const issue = issues.find((entry) => entry.title === TASK_TITLE);
+    expect(issue).toBeTruthy();
+    expect(issue!.assigneeAgentId).toBe(ceoAgent!.id);
+
+    await expect.poll(
+      async () => {
+        const runsRes = await page.request.get(
+          `${baseUrl}/api/companies/${company!.id}/heartbeat-runs?agentId=${ceoAgent!.id}`
+        );
+        expect(runsRes.ok()).toBe(true);
+        const runs = (await runsRes.json()) as Array<{
+          agentId: string;
+          invocationSource: string;
+          status: string;
+        }>;
+        const latestRun = runs.find((entry) => entry.agentId === ceoAgent!.id);
+        return latestRun
+          ? {
+              invocationSource: latestRun.invocationSource,
+              status: latestRun.status,
+            }
+          : null;
+      },
+      {
+        timeout: 30_000,
+        intervals: [1_000, 2_000, 5_000],
+      }
+    ).toEqual(
+      expect.objectContaining({
+        invocationSource: "assignment",
+        status: expect.stringMatching(/^(queued|running|succeeded)$/),
+      })
+    );
+  });
+});
--- a/tests/release-smoke/playwright.config.ts
+++ b/tests/release-smoke/playwright.config.ts
@@ -0,0 +1,28 @@
+import { defineConfig } from "@playwright/test";
+
+const BASE_URL =
+  process.env.PAPERCLIP_RELEASE_SMOKE_BASE_URL ?? "http://127.0.0.1:3232";
+
+export default defineConfig({
+  testDir: ".",
+  testMatch: "**/*.spec.ts",
+  timeout: 90_000,
+  expect: {
+    timeout: 15_000,
+  },
+  retries: process.env.CI ? 1 : 0,
+  use: {
+    baseURL: BASE_URL,
+    headless: true,
+    screenshot: "only-on-failure",
+    trace: "retain-on-failure",
+  },
+  projects: [
+    {
+      name: "chromium",
+      use: { browserName: "chromium" },
+    },
+  ],
+  outputDir: "./test-results",
+  reporter: [["list"], ["html", { open: "never", outputFolder: "./playwright-report" }]],
+});