Coverage for node / src / stigmem_node / utility / entity_normalizer.py: 93%
21 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-25 01:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-25 01:49 +0000
1"""Entity URI normalization — spec §2.6 (v0.7 normative).
3Strict normalizer: deterministic, idempotent, no alias resolution.
4Full fuzzy resolver (Kompl-style) is Phase 6.
5"""
7from __future__ import annotations
9import re
11_FORMAL_URI_RE = re.compile(r"^stigmem://([^/]+)/([^/]+)/(.+)$")
12_WHITESPACE_RE = re.compile(r"\s+")
15class NormalizationError(ValueError):
16 pass
19def normalize_entity_uri(raw: str) -> str:
20 """Return the canonical form of an entity URI string.
22 For formal URIs (stigmem://authority/type/id): lowercases authority, type,
23 and id; trims surrounding whitespace; collapses whitespace in id to hyphens.
25 For informal URIs (type:id, type/id, bare id): lowercases the entire string
26 and collapses whitespace to hyphens without changing the URI format.
27 Does NOT convert informal to formal (that is a separate migration concern).
29 Raises NormalizationError on empty or whitespace-only input.
30 """
31 if not raw or not raw.strip():
32 raise NormalizationError("entity URI must not be empty")
34 stripped = raw.strip()
35 m = _FORMAL_URI_RE.match(stripped)
36 if m:
37 authority = m.group(1).strip().lower()
38 type_slug = m.group(2).strip().lower()
39 id_part = _WHITESPACE_RE.sub("-", m.group(3).strip().lower())
40 if not authority or not type_slug or not id_part: 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true
41 raise NormalizationError(
42 f"normalization produced empty component in formal URI: {raw!r}"
43 )
44 return f"stigmem://{authority}/{type_slug}/{id_part}"
46 # Informal URI: lowercase and collapse whitespace
47 return _WHITESPACE_RE.sub("-", stripped.lower())
50def is_informal(uri: str) -> bool:
51 """Return True if the URI does not use the formal stigmem:// scheme."""
52 return not uri.startswith("stigmem://")