Coverage for node / src / stigmem_node / observability / tracing.py: 61%
73 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-25 01:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-25 01:49 +0000
1"""OpenTelemetry tracing setup — spec §23 (Phase 13).
3Activated when ``opentelemetry-sdk`` is installed and
4``STIGMEM_OTEL_ENABLED=true``. All calls are no-ops when the SDK is absent
5or the feature is disabled, so the node runs without the dependency.
7Usage from routes::
9 from .observability.tracing import start_span
11 with start_span("stigmem.assert_fact") as span:
12 span.set_attribute("stigmem.tenant", tenant_id)
13 # ... do work ...
14"""
16from __future__ import annotations
18import contextlib
19import importlib.util
20import logging
21from collections.abc import Generator
22from typing import Any
24logger = logging.getLogger("stigmem.tracing")
26_OTEL_ENABLED = False
27_tracer: Any = None
30def _is_module_available(module_name: str) -> bool:
31 try:
32 return importlib.util.find_spec(module_name) is not None
33 except ModuleNotFoundError:
34 return False
37_OTEL_SDK_AVAILABLE = all(
38 _is_module_available(module_name)
39 for module_name in (
40 "opentelemetry.trace",
41 "opentelemetry.sdk.resources",
42 "opentelemetry.sdk.trace",
43 )
44)
47def init_tracing(service_name: str, otlp_endpoint: str) -> None:
48 """Initialize the OTel SDK. Called once at app startup when otel_enabled=True."""
49 global _OTEL_ENABLED, _tracer
50 if not _OTEL_SDK_AVAILABLE: 50 ↛ 57line 50 didn't jump to line 57 because the condition on line 50 was always true
51 logging.getLogger("stigmem").warning(
52 "STIGMEM_OTEL_ENABLED=true but opentelemetry-sdk is not installed; "
53 "install stigmem-node[observability] to enable tracing."
54 )
55 return
57 from opentelemetry import trace as _otel_trace
58 from opentelemetry.sdk.resources import Resource
59 from opentelemetry.sdk.trace import TracerProvider
60 from opentelemetry.sdk.trace.export import BatchSpanProcessor
62 resource = Resource(attributes={"service.name": service_name})
63 provider = TracerProvider(resource=resource)
65 if otlp_endpoint:
66 _exporter_added = False
67 try:
68 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
70 exporter = OTLPSpanExporter(endpoint=f"{otlp_endpoint.rstrip('/')}/v1/traces")
71 provider.add_span_processor(BatchSpanProcessor(exporter))
72 _exporter_added = True
73 except ImportError as exc:
74 logger.debug("HTTP OTLP exporter unavailable: %s", exc)
76 if not _exporter_added:
77 try:
78 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
79 OTLPSpanExporter as GrpcOTLPSpanExporter,
80 )
82 grpc_exporter = GrpcOTLPSpanExporter(endpoint=otlp_endpoint)
83 provider.add_span_processor(BatchSpanProcessor(grpc_exporter))
84 except ImportError as exc:
85 logger.debug(
86 "gRPC OTLP exporter unavailable; traces collected locally only: %s",
87 exc,
88 )
90 _otel_trace.set_tracer_provider(provider)
91 _tracer = _otel_trace.get_tracer(
92 "stigmem.node", schema_url="https://opentelemetry.io/schemas/1.21.0"
93 )
94 _OTEL_ENABLED = True
97class _NoopSpan:
98 """Minimal no-op span used when OTel is disabled."""
100 def set_attribute(self, key: str, value: Any) -> None: # noqa: ARG002
101 pass
103 def record_exception(self, exc: BaseException, *, escaped: bool = False) -> None: # noqa: ARG002
104 pass
106 def set_status(self, status: Any, description: str | None = None) -> None: # noqa: ARG002
107 pass
109 def add_event(self, name: str, attributes: dict[str, Any] | None = None) -> None: # noqa: ARG002
110 pass
113@contextlib.contextmanager
114def start_span(name: str, **initial_attributes: Any) -> Generator[_NoopSpan | Any, None, None]:
115 """Start a span as a context manager.
117 Yields a live OTel ``Span`` (when enabled) or a no-op ``_NoopSpan``
118 (when disabled). Always safe to call; never blocks.
119 """
120 if not _OTEL_ENABLED or _tracer is None:
121 yield _NoopSpan()
122 return
124 with _tracer.start_as_current_span(name) as span:
125 for key, value in initial_attributes.items():
126 span.set_attribute(key, value)
127 try:
128 yield span
129 except Exception as exc:
130 try:
131 from opentelemetry.trace import StatusCode
133 span.record_exception(exc)
134 span.set_status(StatusCode.ERROR, str(exc))
135 except Exception as inner_exc: # noqa: BLE001 # nosec B110 — best-effort
136 logger.debug("OTel span error recording failed: %s", inner_exc)
137 raise
140def is_enabled() -> bool:
141 return _OTEL_ENABLED