Improve orphaned local heartbeat recovery

Persist child-process metadata for local adapter runs, keep detached runs alive when their pid still exists, queue a single automatic retry when the pid is confirmed dead, and clear detached warnings when the original run reports activity again.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
dotta
2026-03-19 11:20:36 -05:00
parent 7f3fad64b8
commit c844ca1a40
17 changed files with 10924 additions and 22 deletions

View File

@@ -1,4 +1,4 @@
import { pgTable, uuid, text, timestamp, jsonb, index, integer, bigint, boolean } from "drizzle-orm/pg-core";
import { type AnyPgColumn, pgTable, uuid, text, timestamp, jsonb, index, integer, bigint, boolean } from "drizzle-orm/pg-core";
import { companies } from "./companies.js";
import { agents } from "./agents.js";
import { agentWakeupRequests } from "./agent_wakeup_requests.js";
@@ -31,6 +31,12 @@ export const heartbeatRuns = pgTable(
stderrExcerpt: text("stderr_excerpt"),
errorCode: text("error_code"),
externalRunId: text("external_run_id"),
processPid: integer("process_pid"),
processStartedAt: timestamp("process_started_at", { withTimezone: true }),
retryOfRunId: uuid("retry_of_run_id").references((): AnyPgColumn => heartbeatRuns.id, {
onDelete: "set null",
}),
processLossRetryCount: integer("process_loss_retry_count").notNull().default(0),
contextSnapshot: jsonb("context_snapshot").$type<Record<string, unknown>>(),
createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(),
updatedAt: timestamp("updated_at", { withTimezone: true }).notNull().defaultNow(),