fix: set service.instance.id so metrics carry a unique instance label
otelsetup / vulnerabilities (pull_request) Successful in 1m58s
otelsetup / test (pull_request) Successful in 2m35s
pre-commit / pre-commit (pull_request) Successful in 6m20s

SetupOTelSDK now adds service.instance.id (the pod hostname) to
OTEL_RESOURCE_ATTRIBUTES. The OTLP->Prometheus exporter maps it to the
`instance` label on both metrics and target_info, which (1) stops
multi-replica services from colliding on a single series and (2) gives
PromQL joins a unique (job, instance) key. Falls back to the service name
when the hostname is unavailable.
This commit is contained in:
2026-05-29 08:28:19 +02:00
parent fff76c4acc
commit 717ac59faf
+11 -1
View File
@@ -22,7 +22,17 @@ import (
// SetupOTelSDK bootstraps the OpenTelemetry pipeline.
func SetupOTelSDK(ctx context.Context, enabled bool, serviceName, buildVersion, environment string) (func(context.Context) error, error) {
if os.Getenv("OTEL_RESOURCE_ATTRIBUTES") == "" {
if err := os.Setenv("OTEL_RESOURCE_ATTRIBUTES", fmt.Sprintf("service.name=%s,service.version=%s,service.environment=%s", serviceName, buildVersion, environment)); err != nil {
// service.instance.id makes every pod a distinct telemetry resource. The
// OTLP→Prometheus exporter maps it to the `instance` label on metrics and
// target_info, which keeps multi-replica services from colliding on a
// single series and gives joins a unique (job, instance) key. Hostname is
// the pod name under Kubernetes; fall back to the service name if it is
// unavailable so the attribute is always present.
instanceID, err := os.Hostname()
if err != nil || instanceID == "" {
instanceID = serviceName
}
if err := os.Setenv("OTEL_RESOURCE_ATTRIBUTES", fmt.Sprintf("service.name=%s,service.version=%s,service.environment=%s,service.instance.id=%s", serviceName, buildVersion, environment, instanceID)); err != nil {
return func(context.Context) error {
return nil
}, err