Coverage for node / src / stigmem_node / recall / fuzzy_resolver.py: 100%
19 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-25 01:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-25 01:49 +0000
1"""Fuzzy entity resolver — spec §2.6 Phase 6.
33-layer resolver:
4 Layer 1: strict normalizer (entity_normalizer.py) — deterministic case + whitespace.
5 Layer 2: explicit alias table lookup — user-defined semantic equivalences stored in
6 entity_aliases (e.g. user:alice ≡ user:a.smith).
7 Layer 3: passthrough — returns the Layer 1 result when no alias is registered.
9The strict normalizer is stateless and import-time only; this module adds the
10DB-backed Layer 2 on top and exposes helpers for alias registration / lookup.
12Ingest contract: callers MUST apply normalize_entity_uri first, then call
13resolve_entity with the already-normalized URI and an open connection. This
14keeps the two concerns separable and avoids a second DB round-trip on the hot
15normalization path when no alias table exists.
16"""
18from __future__ import annotations
20import sqlite3
21from datetime import UTC, datetime
22from typing import Any
24from ..utility.entity_normalizer import NormalizationError, normalize_entity_uri
27def resolve_entity(conn: sqlite3.Connection, normalized_uri: str) -> str:
28 """Layer 2 alias lookup. Input MUST already be Layer 1–normalized.
30 Returns canonical_uri from entity_aliases if a registered alias exists,
31 otherwise returns normalized_uri unchanged (Layer 3 passthrough).
32 """
33 row = conn.execute(
34 "SELECT canonical_uri FROM entity_aliases WHERE raw_uri = ?",
35 (normalized_uri,),
36 ).fetchone()
37 return str(row["canonical_uri"]) if row else normalized_uri
40def register_alias(
41 conn: sqlite3.Connection,
42 raw_uri: str,
43 canonical_uri: str,
44 *,
45 kind: str = "user",
46) -> dict[str, Any]:
47 """Register or replace a semantic alias (raw_uri resolves to canonical_uri).
49 Both URIs are Layer 1–normalized before storage so the caller need not
50 pre-normalize them. Raises ValueError on empty input or identical endpoints.
52 Returns the stored alias record as a plain dict.
53 """
54 try:
55 norm_raw = normalize_entity_uri(raw_uri)
56 norm_canonical = normalize_entity_uri(canonical_uri)
57 except NormalizationError as exc:
58 raise ValueError(str(exc)) from exc
60 if norm_raw == norm_canonical:
61 raise ValueError(f"raw_uri and canonical_uri must differ after normalization: {norm_raw!r}")
63 now = datetime.now(UTC).isoformat()
64 conn.execute(
65 """INSERT OR REPLACE INTO entity_aliases (raw_uri, canonical_uri, kind, created_at)
66 VALUES (?, ?, ?, ?)""",
67 (norm_raw, norm_canonical, kind, now),
68 )
69 return {
70 "raw_uri": norm_raw,
71 "canonical_uri": norm_canonical,
72 "kind": kind,
73 "created_at": now,
74 }