From 86a31999bf23f9999555d65b79606f4a6a02b5e3 Mon Sep 17 00:00:00 2001 From: Maksym Sadovnychyy Date: Sun, 26 Apr 2026 11:52:42 +0200 Subject: [PATCH] (bugfix): coordination table provisioner fixes --- CHANGELOG.md | 12 +++++ .../CoordinationTableProvisioner.cs | 38 +++++++++++++++ .../Infrastructure/RunMigrationsService.cs | 47 ++++--------------- .../RuntimeLeaseServiceNpgsql.cs | 10 ++-- .../InitializationHostedService.cs | 8 +++- src/MaksIT.CertsUI/MaksIT.CertsUI.csproj | 2 +- src/helm/values.yaml | 4 +- 7 files changed, 73 insertions(+), 48 deletions(-) create mode 100644 src/MaksIT.CertsUI.Engine/Infrastructure/CoordinationTableProvisioner.cs diff --git a/CHANGELOG.md b/CHANGELOG.md index 28874cb..d1b6747 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.3.13] - 2026-04-26 + +### Fixed + +- **HA lease / `42P01`:** Added `CoordinationTableProvisioner` with explicit `public.*` DDL; `InitializationHostedService` calls it immediately before bootstrap lease acquire (idempotent, same as post-migrate repair). `RuntimeLeaseServiceNpgsql` now uses `public.app_runtime_leases` in SQL so a non-default `search_path` cannot miss the table. Post-migrate verification requires `public.app_runtime_leases` plus `users` or `"VersionInfo"`. + +### Upgrade notes (Kubernetes / Helm) + +- **Pin container tags to the app semver** (e.g. `3.3.13` for server, client, reverseproxy) via `global.image.tag` and/or `components.*.image.tag`. The chart resolves the effective tag with `global.image.tag` when set (see `src/helm/templates/_helpers.tpl`). +- **Do not rely on `latest` + `imagePullPolicy: IfNotPresent` alone** — nodes keep the first pulled digest, so you can run an old server binary while the OCI chart is already `3.3.13`. Use an explicit semver tag and/or `pullPolicy: Always` (or bump `global.rolloutNonce` / `global.rollme` per chart NOTES) when upgrading. +- **Push all three images** for the tag you pin (`certs-ui/server`, `certs-ui/client`, `certs-ui/reverseproxy`) so every deployment can pull successfully. + ## [3.3.12] - 2026-04-26 ### Fixed diff --git a/src/MaksIT.CertsUI.Engine/Infrastructure/CoordinationTableProvisioner.cs b/src/MaksIT.CertsUI.Engine/Infrastructure/CoordinationTableProvisioner.cs new file mode 100644 index 0000000..4cccf64 --- /dev/null +++ b/src/MaksIT.CertsUI.Engine/Infrastructure/CoordinationTableProvisioner.cs @@ -0,0 +1,38 @@ +using Npgsql; + +namespace MaksIT.CertsUI.Engine.Infrastructure; + +/// +/// Idempotent DDL for HA coordination tables in schema public (same shape as the AcmeChallengesAndRuntimeLeases migration). Used after FluentMigrator and again before bootstrap lease +/// so never runs against a missing app_runtime_leases. +/// +public static class CoordinationTableProvisioner { + + /// Creates public.acme_http_challenges and public.app_runtime_leases if missing. + public static async Task EnsureAsync(string? connectionString, CancellationToken cancellationToken = default) { + if (string.IsNullOrWhiteSpace(connectionString)) + return; + + await using var conn = new NpgsqlConnection(connectionString); + await conn.OpenAsync(cancellationToken).ConfigureAwait(false); + + await using var cmd = new NpgsqlCommand( + """ + CREATE TABLE IF NOT EXISTS public.acme_http_challenges ( + file_name text NOT NULL PRIMARY KEY, + token_value text NOT NULL, + created_at_utc timestamp with time zone NOT NULL + ); + CREATE INDEX IF NOT EXISTS "IX_acme_http_challenges_created_at_utc" ON public.acme_http_challenges (created_at_utc); + CREATE TABLE IF NOT EXISTS public.app_runtime_leases ( + lease_name text NOT NULL PRIMARY KEY, + holder_id text NOT NULL, + version bigint NOT NULL DEFAULT 1, + acquired_at_utc timestamp with time zone NOT NULL, + expires_at_utc timestamp with time zone NOT NULL + ); + """, + conn); + await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } +} diff --git a/src/MaksIT.CertsUI.Engine/Infrastructure/RunMigrationsService.cs b/src/MaksIT.CertsUI.Engine/Infrastructure/RunMigrationsService.cs index d160e7b..cf32a40 100644 --- a/src/MaksIT.CertsUI.Engine/Infrastructure/RunMigrationsService.cs +++ b/src/MaksIT.CertsUI.Engine/Infrastructure/RunMigrationsService.cs @@ -38,7 +38,7 @@ public sealed class RunMigrationsService( await EnsureDatabaseExistsAsync(cancellationToken).ConfigureAwait(false); await BaselineExistingEfDatabaseAsync(cancellationToken).ConfigureAwait(false); await Task.Run(() => migrationRunner.MigrateUp(), cancellationToken).ConfigureAwait(false); - await EnsureCoordinationTablesAsync(cancellationToken).ConfigureAwait(false); + await CoordinationTableProvisioner.EnsureAsync(config.ConnectionString, cancellationToken).ConfigureAwait(false); await VerifyCoreSchemaAsync(cancellationToken).ConfigureAwait(false); logger.LogInformation("Certs database migrations completed."); } @@ -50,12 +50,12 @@ public sealed class RunMigrationsService( await using var cmd = new NpgsqlCommand( """ - SELECT EXISTS ( - SELECT 1 FROM information_schema.tables - WHERE table_schema = 'public' AND table_name = 'users') - OR EXISTS ( - SELECT 1 FROM information_schema.tables - WHERE table_schema = 'public' AND table_name = 'VersionInfo'); + SELECT + EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'app_runtime_leases') + AND ( + EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'users') + OR EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'VersionInfo') + ); """, conn); @@ -64,37 +64,8 @@ public sealed class RunMigrationsService( return; throw new InvalidOperationException( - "After FluentMigrator MigrateUp(), the target database still has no \"users\" or \"VersionInfo\" table in schema \"public\". " + - "Confirm the connection string Database= value, that the role can CREATE TABLE, and that FluentMigrator is not in preview/connectionless mode (non-empty connection string)."); - } - - /// - /// Idempotent DDL for HA tables from . - /// When VersionInfo already lists that migration but the tables are missing (restore drift, partial apply), - /// FluentMigrator will not re-run Up(); this repair keeps lease and HTTP-01 persistence working. - /// - private async Task EnsureCoordinationTablesAsync(CancellationToken cancellationToken) { - await using var conn = new NpgsqlConnection(config.ConnectionString); - await conn.OpenAsync(cancellationToken).ConfigureAwait(false); - - await using var cmd = new NpgsqlCommand( - """ - CREATE TABLE IF NOT EXISTS acme_http_challenges ( - file_name text NOT NULL PRIMARY KEY, - token_value text NOT NULL, - created_at_utc timestamp with time zone NOT NULL - ); - CREATE INDEX IF NOT EXISTS "IX_acme_http_challenges_created_at_utc" ON acme_http_challenges (created_at_utc); - CREATE TABLE IF NOT EXISTS app_runtime_leases ( - lease_name text NOT NULL PRIMARY KEY, - holder_id text NOT NULL, - version bigint NOT NULL DEFAULT 1, - acquired_at_utc timestamp with time zone NOT NULL, - expires_at_utc timestamp with time zone NOT NULL - ); - """, - conn); - await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + "After migrations and coordination DDL, schema \"public\" is missing \"app_runtime_leases\" and/or core tables (\"users\" / \"VersionInfo\"). " + + "Confirm Database= in the connection string, role CREATE privileges, and that FluentMigrator committed (non-empty connection string)."); } private async Task EnsureDatabaseExistsAsync(CancellationToken cancellationToken) { diff --git a/src/MaksIT.CertsUI.Engine/Infrastructure/RuntimeLeaseServiceNpgsql.cs b/src/MaksIT.CertsUI.Engine/Infrastructure/RuntimeLeaseServiceNpgsql.cs index f9a11a3..c81d0b9 100644 --- a/src/MaksIT.CertsUI.Engine/Infrastructure/RuntimeLeaseServiceNpgsql.cs +++ b/src/MaksIT.CertsUI.Engine/Infrastructure/RuntimeLeaseServiceNpgsql.cs @@ -29,15 +29,15 @@ public sealed class RuntimeLeaseServiceNpgsql( await using var cmd = new NpgsqlCommand( """ - INSERT INTO app_runtime_leases (lease_name, holder_id, version, acquired_at_utc, expires_at_utc) + INSERT INTO public.app_runtime_leases (lease_name, holder_id, version, acquired_at_utc, expires_at_utc) VALUES (@name, @holder, 1, @acquired, @expires) ON CONFLICT (lease_name) DO UPDATE SET holder_id = EXCLUDED.holder_id, - version = app_runtime_leases.version + 1, + version = public.app_runtime_leases.version + 1, acquired_at_utc = EXCLUDED.acquired_at_utc, expires_at_utc = EXCLUDED.expires_at_utc - WHERE app_runtime_leases.expires_at_utc < EXCLUDED.acquired_at_utc - OR app_runtime_leases.holder_id = EXCLUDED.holder_id + WHERE public.app_runtime_leases.expires_at_utc < EXCLUDED.acquired_at_utc + OR public.app_runtime_leases.holder_id = EXCLUDED.holder_id RETURNING holder_id; """, conn); @@ -72,7 +72,7 @@ public sealed class RuntimeLeaseServiceNpgsql( await using var cmd = new NpgsqlCommand( """ - DELETE FROM app_runtime_leases + DELETE FROM public.app_runtime_leases WHERE lease_name = @name AND holder_id = @holder; """, conn); diff --git a/src/MaksIT.CertsUI/HostedServices/InitializationHostedService.cs b/src/MaksIT.CertsUI/HostedServices/InitializationHostedService.cs index 0516686..080c9c7 100644 --- a/src/MaksIT.CertsUI/HostedServices/InitializationHostedService.cs +++ b/src/MaksIT.CertsUI/HostedServices/InitializationHostedService.cs @@ -1,4 +1,5 @@ using Microsoft.Extensions.Options; +using MaksIT.CertsUI.Engine; using MaksIT.CertsUI.Engine.DomainServices; using MaksIT.CertsUI.Engine.Infrastructure; using MaksIT.CertsUI.Engine.RuntimeCoordination; @@ -7,8 +8,8 @@ namespace MaksIT.CertsUI.HostedServices; /// /// Runs identity bootstrap before the API starts serving requests. FluentMigrator already ran in Program.cs -/// before the host starts. The bootstrap lease ensures only one replica writes against shared -/// . +/// before the host starts; coordination tables in public are ensured again here before the bootstrap lease. +/// The bootstrap lease ensures only one replica writes against shared . /// public sealed class InitializationHostedService( ILogger logger, @@ -27,6 +28,9 @@ public sealed class InitializationHostedService( try { logger.LogInformation("Running startup initialization..."); + var engineConfig = serviceProvider.GetRequiredService(); + await CoordinationTableProvisioner.EnsureAsync(engineConfig.ConnectionString, cancellationToken).ConfigureAwait(false); + var holder = runtimeInstance.InstanceId; var acquired = await runtimeLease.TryAcquireAsync(RuntimeLeaseNames.Bootstrap, holder, BootstrapLeaseTtl, cancellationToken).ConfigureAwait(false); if (!acquired.IsSuccess) diff --git a/src/MaksIT.CertsUI/MaksIT.CertsUI.csproj b/src/MaksIT.CertsUI/MaksIT.CertsUI.csproj index c5658e2..50def97 100644 --- a/src/MaksIT.CertsUI/MaksIT.CertsUI.csproj +++ b/src/MaksIT.CertsUI/MaksIT.CertsUI.csproj @@ -1,7 +1,7 @@ - 3.3.12 + 3.3.13 net10.0 enable enable diff --git a/src/helm/values.yaml b/src/helm/values.yaml index a017e12..5860d58 100644 --- a/src/helm/values.yaml +++ b/src/helm/values.yaml @@ -1,8 +1,8 @@ global: imagePullSecrets: [] image: - # Uncomment to override every component (global wins when set). Otherwise use each components.*.image. - # No Chart.appVersion. + # When non-empty, overrides every components.*.image.tag (see _helpers.tpl). Production/staging: pin to released + # semver (e.g. 3.3.13) and use pullPolicy Always or bump tag each release — do not rely on :latest + IfNotPresent alone. # tag: "latest" # pullPolicy: IfNotPresent # Optional rollout tuning (see NOTES): pin a fixed pod annotation or add a nonce for frozen/git-rendered manifests.