Coverage for node / src / stigmem_node / recall / fuzzy_resolver.py: 100%

19 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-25 01:49 +0000

1"""Fuzzy entity resolver — spec §2.6 Phase 6. 

2 

33-layer resolver: 

4 Layer 1: strict normalizer (entity_normalizer.py) — deterministic case + whitespace. 

5 Layer 2: explicit alias table lookup — user-defined semantic equivalences stored in 

6 entity_aliases (e.g. user:alice ≡ user:a.smith). 

7 Layer 3: passthrough — returns the Layer 1 result when no alias is registered. 

8 

9The strict normalizer is stateless and import-time only; this module adds the 

10DB-backed Layer 2 on top and exposes helpers for alias registration / lookup. 

11 

12Ingest contract: callers MUST apply normalize_entity_uri first, then call 

13resolve_entity with the already-normalized URI and an open connection. This 

14keeps the two concerns separable and avoids a second DB round-trip on the hot 

15normalization path when no alias table exists. 

16""" 

17 

18from __future__ import annotations 

19 

20import sqlite3 

21from datetime import UTC, datetime 

22from typing import Any 

23 

24from ..utility.entity_normalizer import NormalizationError, normalize_entity_uri 

25 

26 

27def resolve_entity(conn: sqlite3.Connection, normalized_uri: str) -> str: 

28 """Layer 2 alias lookup. Input MUST already be Layer 1–normalized. 

29 

30 Returns canonical_uri from entity_aliases if a registered alias exists, 

31 otherwise returns normalized_uri unchanged (Layer 3 passthrough). 

32 """ 

33 row = conn.execute( 

34 "SELECT canonical_uri FROM entity_aliases WHERE raw_uri = ?", 

35 (normalized_uri,), 

36 ).fetchone() 

37 return str(row["canonical_uri"]) if row else normalized_uri 

38 

39 

40def register_alias( 

41 conn: sqlite3.Connection, 

42 raw_uri: str, 

43 canonical_uri: str, 

44 *, 

45 kind: str = "user", 

46) -> dict[str, Any]: 

47 """Register or replace a semantic alias (raw_uri resolves to canonical_uri). 

48 

49 Both URIs are Layer 1–normalized before storage so the caller need not 

50 pre-normalize them. Raises ValueError on empty input or identical endpoints. 

51 

52 Returns the stored alias record as a plain dict. 

53 """ 

54 try: 

55 norm_raw = normalize_entity_uri(raw_uri) 

56 norm_canonical = normalize_entity_uri(canonical_uri) 

57 except NormalizationError as exc: 

58 raise ValueError(str(exc)) from exc 

59 

60 if norm_raw == norm_canonical: 

61 raise ValueError(f"raw_uri and canonical_uri must differ after normalization: {norm_raw!r}") 

62 

63 now = datetime.now(UTC).isoformat() 

64 conn.execute( 

65 """INSERT OR REPLACE INTO entity_aliases (raw_uri, canonical_uri, kind, created_at) 

66 VALUES (?, ?, ?, ?)""", 

67 (norm_raw, norm_canonical, kind, now), 

68 ) 

69 return { 

70 "raw_uri": norm_raw, 

71 "canonical_uri": norm_canonical, 

72 "kind": kind, 

73 "created_at": now, 

74 }