Coverage for node / src / stigmem_node / settings.py: 95%

134 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-25 01:49 +0000

1from datetime import datetime 

2from typing import Annotated 

3 

4from pydantic import field_validator, model_validator 

5from pydantic_settings import BaseSettings, NoDecode, SettingsConfigDict 

6 

7 

8class Settings(BaseSettings): 

9 model_config = SettingsConfigDict( 

10 env_prefix="STIGMEM_", 

11 env_file=".env", 

12 env_file_encoding="utf-8", 

13 extra="ignore", 

14 ) 

15 

16 db_path: str = "stigmem.db" 

17 host: str = "0.0.0.0" # noqa: S104 # nosec B104 — overridable via STIGMEM_HOST 

18 port: int = 8765 

19 # Optional stable node identity for deterministic local/CI federation fixtures. 

20 # When unset, the node creates and persists a stigmem:node:<uuid> identity. 

21 node_id: str = "" 

22 node_url: str = "http://localhost:8765" 

23 log_level: str = "info" 

24 cors_allowed_origins: Annotated[list[str], NoDecode] = [] 

25 cors_allowed_origin_regex: str | None = None 

26 cors_allow_credentials: bool = True 

27 cors_dev_localhost: bool = False 

28 

29 # When True (default), every request must carry a valid Bearer token. 

30 # Set to False only for local development / single-operator installs. 

31 auth_required: bool = True 

32 # Static API-key lifecycle controls. 0 disables max-age enforcement. 

33 api_key_max_age_days: int = 90 

34 api_key_expiring_soon_days: int = 30 

35 legacy_sha256_accept_until: datetime | None = None 

36 

37 @field_validator("cors_allowed_origins", mode="before") 

38 @classmethod 

39 def _parse_cors_allowed_origins(cls, v: object) -> list[str]: 

40 if v is None or v == "": 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true

41 return [] 

42 if isinstance(v, str): 

43 return [origin.strip() for origin in v.split(",") if origin.strip()] 

44 if isinstance(v, list): 44 ↛ 46line 44 didn't jump to line 46 because the condition on line 44 was always true

45 return [str(origin).strip() for origin in v if str(origin).strip()] 

46 return v # type: ignore[return-value] 

47 

48 # Federation — Phase 3 (spec §6) 

49 federation_enabled: bool = False 

50 # Base64url Ed25519 keypair. If both are empty, auto-generated and stored in node_meta. 

51 federation_pubkey: str = "" 

52 federation_privkey: str = "" 

53 # Pull replication interval in seconds (spec §6.3); advisory pull_interval_s 

54 # from peer overrides this. 

55 federation_pull_interval_s: int = 30 

56 federation_push_enabled: bool = False 

57 # Explicit dev/test escape hatch for federation without mTLS. Production 

58 # federation should leave this false and configure STIGMEM_TLS_* instead. 

59 federation_insecure: bool = False 

60 # Additional acknowledgement for local Docker/dev networks whose service DNS 

61 # names are not loopback. Never set in production. 

62 local_dev_allow_insecure_non_loopback: bool = False 

63 # Nonce window: how long (seconds) a nonce is kept to detect replays 

64 # (spec §6.6, default 5 min). 

65 federation_nonce_window_s: int = 300 

66 # Clock-skew leeway for peer-token exp/iat/nbf claim checks. 

67 peer_token_leeway_s: int = 30 

68 # Maximum accepted remote HLC skew for federated fact ingest. Future skew is 

69 # strict by default because it can advance local logical time; past skew is a 

70 # wider archival bound and may be set to 0 for one-off historical backfills. 

71 federation_hlc_max_future_skew_s: int = 300 

72 federation_hlc_max_past_skew_s: int = 2_592_000 

73 # Allow team-scoped facts to cross federation boundaries 

74 # (must be explicitly enabled; audit-logged). 

75 federation_allow_team: bool = False 

76 

77 # Decay sweeper (Phase 6, spec §decay) 

78 # 0 = disabled; positive = decay non-expiring facts older than N seconds 

79 # when sweep runs without explicit ttl_seconds 

80 decay_ttl_seconds: int = 0 

81 # 0.0 = disabled; positive = decay facts below this confidence when sweep 

82 # runs without explicit min_confidence 

83 decay_min_confidence: float = 0.0 

84 

85 # Track C / C1: require Ed25519 attestation on all fact assertions. 

86 # When True, POST /v1/facts must include a valid attestation token. 

87 # Defaults to False for backward compatibility. 

88 attestation_required: bool = False 

89 

90 # OIDC bridge (Track B / B3): human identity → scoped API keys. 

91 # Set oidc_enabled=true and configure the remaining fields to activate. 

92 oidc_enabled: bool = False 

93 # IdP issuer URL; discovery doc fetched from {issuer_url}/.well-known/openid-configuration 

94 oidc_issuer_url: str = "" 

95 # client_id expected in the id_token's "aud" claim 

96 oidc_audience: str = "" 

97 # lifetime of issued API keys in hours (default 8 h working-day session) 

98 oidc_token_ttl_hours: int = 8 

99 # Accepted OIDC id_token signing algorithms. Operators can narrow this list. 

100 oidc_id_token_algorithms: list[str] = ["RS256", "ES256", "PS256", "EdDSA"] 

101 # comma-separated list of allowed email domains; empty = allow any 

102 oidc_allowed_domains: str = "" 

103 

104 # Async job threshold (spec §14.5 / §15.4): scopes with more facts than this 

105 # trigger the async 202 path. Override in tests to force async path at small scale. 

106 async_job_threshold: int = 100_000 

107 

108 # Source attestation mode (legacy compatibility field). 

109 # Source-attestation runtime behavior is gated by the experimental 

110 # stigmem-plugin-source-attestation package. Default installs keep this off. 

111 source_attestation_mode: str = "off" 

112 

113 # Rate limiting for hosted offering (per API key, sliding 1-hour window). 

114 # 0 = disabled. 

115 rate_limit_write_per_hour: int = 1000 

116 rate_limit_read_per_hour: int = 5000 

117 rate_limit_disabled_ack: bool = False 

118 

119 # Storage backend (Phase 8 / 11). 

120 # "sqlite" (default) — local SQLite file at db_path. 

121 # "libsql" — libSQL / Turso; uses db_path as the local replica 

122 # file; set libsql_url + libsql_auth_token for 

123 # embedded-replica sync with Turso. 

124 # "postgres" — PostgreSQL; set pg_dsn to a libpq connection string. 

125 storage_backend: str = "sqlite" 

126 # Turso database endpoint, e.g. "libsql://my-db.turso.io" 

127 libsql_url: str = "" 

128 # Turso auth token (from `turso db tokens create`) 

129 libsql_auth_token: str = "" 

130 # PostgreSQL connection string, e.g. "postgresql://user:pw@localhost/stigmem" 

131 pg_dsn: str = "" 

132 # DATABASE_URL alias (Heroku / PaaS convention); also read from bare DATABASE_URL env var. 

133 database_url: str = "" 

134 # PostgreSQL schema for all tables (default: "public"). Use a unique 

135 # per-test schema to achieve row-level isolation without separate databases. 

136 pg_schema: str = "public" 

137 # Connection pool bounds for the Postgres backend. 

138 postgres_pool_min: int = 2 

139 postgres_pool_max: int = 10 

140 

141 # Encryption at rest (Phase 8). 

142 # "off" (default) — no encryption; plaintext DB (dev-friendly default). 

143 # "on" — SQLCipher for SQLite backend; native encryption for libSQL. 

144 # When "on", exactly one of at_rest_key_passphrase_env / at_rest_key_kms_uri 

145 # must be set — the node refuses to start otherwise. 

146 at_rest_encryption: str = "off" 

147 # Name of the env var whose value is the passphrase (not the passphrase itself). 

148 # e.g. STIGMEM_AT_REST_KEY_PASSPHRASE_ENV=MY_DB_PASSPHRASE 

149 at_rest_key_passphrase_env: str = "" 

150 # KMS URI for raw 32-byte key material. "env://VAR" reads a hex-encoded key 

151 # from env var VAR. Future schemes: "aws-kms://...", "gcp-kms://...". 

152 at_rest_key_kms_uri: str = "" 

153 

154 @field_validator("at_rest_encryption") 

155 @classmethod 

156 def _validate_encryption_mode(cls, v: str) -> str: 

157 if v not in ("on", "off"): 

158 raise ValueError(f"at_rest_encryption must be 'on' or 'off'; got {v!r}") 

159 return v 

160 

161 # Federation Trust — Phase 8 (spec §19) 

162 # trust_mode controls source-trust scoring and quarantine routing: 

163 # "strict" — trust is computed for all inbound facts; t < 0.2 → quarantine. 

164 # "relaxed" — trust is computed but quarantine is not auto-triggered (default). 

165 # "off" — trust not computed; source_trust is null on all facts. 

166 trust_mode: str = "relaxed" 

167 

168 # Sanitizer mode (§19.7) applied at recall time: 

169 # "block" — fact excluded, placeholder returned. 

170 # "quarantine"— fact moved to quarantine garden. 

171 # "warn" — fact returned with sanitizer_warnings (default). 

172 # "off" — no check (implied by trust_mode=off). 

173 sanitizer_mode: str = "warn" 

174 

175 # UUID of the node's designated quarantine garden. 

176 # Required in strict mode; facts below threshold are rejected with 403 if unset. 

177 quarantine_garden_id: str = "" 

178 

179 # Source-trust score weights (§19.4.2). Must sum to 1.0; deviations are not 

180 # validated at startup — set incorrectly and t will be out of [0,1] range. 

181 trust_weight_identity: float = 0.35 

182 trust_weight_peer_history: float = 0.30 

183 trust_weight_scope_authority: float = 0.25 

184 trust_weight_attestation_mode: float = 0.10 

185 

186 # Path to a newline-delimited file of extra sanitizer regex patterns (§19.7.2). 

187 sanitizer_extra_patterns_file: str = "" 

188 

189 # Path to YAML file defining operator auto-trust rules (always_trust / never_trust). 

190 trust_rules_file: str = "" 

191 

192 # Plugin signing gate (ADR-011 / PR 4-INF.3). 

193 # When true, installed entry-point plugins must pass signing verification 

194 # before registration. Set false only for local development; unsigned plugin 

195 # loading remains warning- and audit-visible. 

196 plugin_signing_required: bool = True 

197 # Required literal acknowledgement before unsigned-plugin loading is allowed. 

198 plugin_unsigned_ack: str = "" 

199 # Comma-separated Sigstore signing identities accepted for production plugin 

200 # registration when plugin_signing_required=true. 

201 plugin_trusted_publishers: str = "" 

202 # Comma-separated signing identities accepted through explicit operator 

203 # override. Overrides remain audit-visible and are not a substitute for the 

204 # trusted-publisher allowlist. 

205 plugin_trust_override_publishers: str = "" 

206 

207 # Transparency log backend (§19.2.3): 

208 # "local" — append-only JSONL file with hash chain (default, no external deps). 

209 # "rekor" — Sigstore Rekor REST API. 

210 # "off" — no TL submission; inclusion proofs are never verified. 

211 tl_backend: str = "local" 

212 tl_local_path: str = "stigmem_tl.jsonl" 

213 tl_rekor_url: str = "https://rekor.sigstore.dev" 

214 fact_chain_checkpoint_interval: int = 1000 

215 fact_chain_checkpoint_max_age_s: int = 60 

216 fact_chain_checkpoint_retry_s: int = 60 

217 

218 # Capability token signing — spec §19.3.2 (C-SEC-1). 

219 # Base64url-encoded raw 32-byte Ed25519 seed used to sign capability tokens and 

220 # revocation events. If empty, token signing is skipped and verify_token() will 

221 # reject all tokens (dev/test nodes that don't participate in trust federation). 

222 node_private_key: str = "" 

223 

224 @field_validator("node_private_key") 

225 @classmethod 

226 def _validate_node_private_key(cls, v: str) -> str: 

227 if not v: 

228 return v 

229 import base64 

230 

231 padded = v + "=" * (-len(v) % 4) 

232 try: 

233 raw = base64.urlsafe_b64decode(padded) 

234 except Exception as exc: 

235 raise ValueError(f"node_private_key is not valid base64url: {exc}") from exc 

236 if len(raw) != 32: 236 ↛ 237line 236 didn't jump to line 237 because the condition on line 236 was never true

237 raise ValueError(f"node_private_key must decode to exactly 32 bytes; got {len(raw)}") 

238 return v 

239 

240 # ------------------------------------------------------------------------- 

241 # Embeddings — Phase 9 (spec §20 / design memo §2) 

242 # ------------------------------------------------------------------------- 

243 # Set embed_enabled=true to activate sqlite-vec integration. 

244 # When false (default), no extension is loaded and no embeddings are stored. 

245 embed_enabled: bool = False 

246 

247 # "local" — Ollama HTTP API (default); requires a running Ollama instance. 

248 # "openai" — OpenAI embeddings API; requires OPENAI_API_KEY (or the env var 

249 # named by embed_openai_api_key_env). 

250 # "stub" — deterministic test stub; no external dependencies. 

251 embed_model_provider: str = "local" 

252 

253 # Model identifier passed to the provider. 

254 # Local default: "nomic-embed-text-v1.5" (768-dim, Apache-2.0, runs on laptop). 

255 # OpenAI default: "text-embedding-3-small" (1536-dim). 

256 embed_model_id: str = "nomic-embed-text-v1.5" 

257 

258 # Output dimensionality. MUST match the model; changing this after the first 

259 # embedding requires running `stigmem embed reindex` (migration tool). 

260 embed_dimension: int = 768 

261 

262 # Ollama base URL (local provider only). 

263 embed_ollama_url: str = "http://localhost:11434" 

264 

265 # Name of the env var holding the OpenAI API key (openai provider only). 

266 embed_openai_api_key_env: str = "OPENAI_API_KEY" 

267 

268 # Facts with confidence below this threshold have their vec_facts entry 

269 # deleted during the decay sweep (design memo §2 "Decay interaction"). 

270 embed_tombstone_threshold: float = 0.1 

271 

272 # Subscription primitive (Phase 9, spec §20) 

273 # How long (seconds) the replay window extends back from now (default 24 h). 

274 subscription_replay_s: int = 86400 

275 # How often (seconds) the background sweep retries pending/failed delivery. 

276 subscription_delivery_sweep_s: int = 30 

277 # Consecutive delivery failures before the circuit breaker opens on a subscription. 

278 subscription_circuit_threshold: int = 10 

279 # How long (seconds) an event may remain in 'delivering' state before the next 

280 # ``deliver_pending`` invocation reverts it to 'pending' for redelivery. 

281 # Guards against crashed workers stranding events. Must be larger than the 

282 # worst-case webhook timeout (10 s) by a comfortable margin. 

283 subscription_claim_timeout_s: int = 300 

284 

285 # ------------------------------------------------------------------------- 

286 # mTLS Federation Transport — Phase 12 (spec §22.1) 

287 # ------------------------------------------------------------------------- 

288 # Path to the node's PEM-encoded X.509 certificate for mTLS federation. 

289 # When tls_cert_path + tls_key_path are both set, mTLS is activated: 

290 # the uvicorn server requires client certs and the pull client presents this 

291 # cert to peers. Opt-out is only permitted for localhost deployments 

292 # (set host to "localhost" / "127.0.0.1" / "::1" and leave paths empty). 

293 tls_cert_path: str = "" 

294 # Path to the node's PEM-encoded private key corresponding to tls_cert_path. 

295 tls_key_path: str = "" 

296 # Path to a PEM CA bundle used to verify peer certificates. 

297 # Required when tls_cert_path + tls_key_path are configured. 

298 tls_ca_bundle: str = "" 

299 

300 @model_validator(mode="after") 

301 def _require_ca_bundle_for_mtls(self) -> "Settings": 

302 if self.tls_cert_path and self.tls_key_path and not self.tls_ca_bundle: 

303 raise ValueError( 

304 "STIGMEM_TLS_CA_BUNDLE is required when mTLS is enabled " 

305 "(STIGMEM_TLS_CERT_PATH + STIGMEM_TLS_KEY_PATH are set). " 

306 "Without it, peer certs fall back to the system CA store instead " 

307 "of the closed federation trust bundle (spec §22.1.2.2)." 

308 ) 

309 return self 

310 

311 @property 

312 def mtls_enabled(self) -> bool: 

313 """True when mTLS cert + key are configured (non-localhost deployments).""" 

314 return bool(self.tls_cert_path and self.tls_key_path) 

315 

316 # ------------------------------------------------------------------------- 

317 # Observability — Phase 13 (spec §23) 

318 # ------------------------------------------------------------------------- 

319 # Set otel_enabled=true to activate OpenTelemetry tracing. 

320 # Requires stigmem-node[observability] (opentelemetry-sdk + OTLP exporter). 

321 otel_enabled: bool = False 

322 

323 # Service name reported in OTel resource attributes. 

324 otel_service_name: str = "stigmem-node" 

325 

326 # OTLP collector base URL (HTTP protocol). 

327 # e.g. "http://localhost:4318" for a local OpenTelemetry Collector or Tempo. 

328 # Leave empty to disable OTLP export (spans collected locally only). 

329 otel_exporter_otlp_endpoint: str = "" 

330 

331 # ------------------------------------------------------------------------- 

332 # Time-travel / as_of — Phase 13 (spec §24.2.2) 

333 # ------------------------------------------------------------------------- 

334 # Minimum allowed as_of timestamp (ISO 8601 UTC). Queries before this floor 

335 # return 400 as_of_before_retention_floor. Empty string = no floor enforced. 

336 as_of_retention_floor: str = "" 

337 

338 

339settings = Settings()