Coverage for node / src / stigmem_node / utility / entity_normalizer.py: 93%

21 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-25 01:49 +0000

1"""Entity URI normalization — spec §2.6 (v0.7 normative). 

2 

3Strict normalizer: deterministic, idempotent, no alias resolution. 

4Full fuzzy resolver (Kompl-style) is Phase 6. 

5""" 

6 

7from __future__ import annotations 

8 

9import re 

10 

11_FORMAL_URI_RE = re.compile(r"^stigmem://([^/]+)/([^/]+)/(.+)$") 

12_WHITESPACE_RE = re.compile(r"\s+") 

13 

14 

15class NormalizationError(ValueError): 

16 pass 

17 

18 

19def normalize_entity_uri(raw: str) -> str: 

20 """Return the canonical form of an entity URI string. 

21 

22 For formal URIs (stigmem://authority/type/id): lowercases authority, type, 

23 and id; trims surrounding whitespace; collapses whitespace in id to hyphens. 

24 

25 For informal URIs (type:id, type/id, bare id): lowercases the entire string 

26 and collapses whitespace to hyphens without changing the URI format. 

27 Does NOT convert informal to formal (that is a separate migration concern). 

28 

29 Raises NormalizationError on empty or whitespace-only input. 

30 """ 

31 if not raw or not raw.strip(): 

32 raise NormalizationError("entity URI must not be empty") 

33 

34 stripped = raw.strip() 

35 m = _FORMAL_URI_RE.match(stripped) 

36 if m: 

37 authority = m.group(1).strip().lower() 

38 type_slug = m.group(2).strip().lower() 

39 id_part = _WHITESPACE_RE.sub("-", m.group(3).strip().lower()) 

40 if not authority or not type_slug or not id_part: 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true

41 raise NormalizationError( 

42 f"normalization produced empty component in formal URI: {raw!r}" 

43 ) 

44 return f"stigmem://{authority}/{type_slug}/{id_part}" 

45 

46 # Informal URI: lowercase and collapse whitespace 

47 return _WHITESPACE_RE.sub("-", stripped.lower()) 

48 

49 

50def is_informal(uri: str) -> bool: 

51 """Return True if the URI does not use the formal stigmem:// scheme.""" 

52 return not uri.startswith("stigmem://")