From 057e3a494cb6ee28ef058b6fd8d1122853596d65 Mon Sep 17 00:00:00 2001 From: Volodymyr Kartavyi Date: Wed, 11 Mar 2026 21:59:02 +0100 Subject: [PATCH] fix: ensure embedded PostgreSQL databases use UTF-8 encoding On macOS, `initdb` defaults to SQL_ASCII encoding because it infers locale from the system environment. When `ensurePostgresDatabase()` creates a database without specifying encoding, the new database inherits SQL_ASCII from the cluster. This causes string functions like `left()` to operate on bytes instead of characters, producing invalid UTF-8 when multi-byte characters are truncated. Two-part fix: 1. Pass `--encoding=UTF8 --locale=C` via `initdbFlags` to all EmbeddedPostgres constructors so the cluster defaults to UTF-8. 2. Explicitly set `encoding 'UTF8'` in the CREATE DATABASE statement with `template template0` (required because template1 may already have a different encoding) and `C` locale for portability. Existing databases created with SQL_ASCII are NOT automatically fixed; users must delete their local `data/db` directory and restart to re-initialize the cluster. Relates to #636 Co-Authored-By: Claude Opus 4.6 --- cli/src/commands/worktree.ts | 1 + packages/db/src/client.ts | 2 +- packages/db/src/migration-runtime.ts | 1 + server/src/index.ts | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cli/src/commands/worktree.ts b/cli/src/commands/worktree.ts index 8781b008..d956f3d3 100644 --- a/cli/src/commands/worktree.ts +++ b/cli/src/commands/worktree.ts @@ -505,6 +505,7 @@ async function ensureEmbeddedPostgres(dataDir: string, preferredPort: number): P password: "paperclip", port, persistent: true, + initdbFlags: ["--encoding=UTF8", "--locale=C"], onLog: () => {}, onError: () => {}, }); diff --git a/packages/db/src/client.ts b/packages/db/src/client.ts index c4275dc4..83b4aa78 100644 --- a/packages/db/src/client.ts +++ b/packages/db/src/client.ts @@ -730,7 +730,7 @@ export async function ensurePostgresDatabase( `; if (existing.length > 0) return "exists"; - await sql.unsafe(`create database "${databaseName}"`); + await sql.unsafe(`create database "${databaseName}" encoding 'UTF8' lc_collate 'C' lc_ctype 'C' template template0`); return "created"; } finally { await sql.end(); diff --git a/packages/db/src/migration-runtime.ts b/packages/db/src/migration-runtime.ts index bc90b762..10b7b9b1 100644 --- a/packages/db/src/migration-runtime.ts +++ b/packages/db/src/migration-runtime.ts @@ -96,6 +96,7 @@ async function ensureEmbeddedPostgresConnection( password: "paperclip", port: preferredPort, persistent: true, + initdbFlags: ["--encoding=UTF8", "--locale=C"], onLog: () => {}, onError: () => {}, }); diff --git a/server/src/index.ts b/server/src/index.ts index c220df92..50c6a7b2 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -334,6 +334,7 @@ export async function startServer(): Promise { password: "paperclip", port, persistent: true, + initdbFlags: ["--encoding=UTF8", "--locale=C"], onLog: appendEmbeddedPostgresLog, onError: appendEmbeddedPostgresLog, });