fix: set service.instance.id so metrics carry a unique instance label
SetupOTelSDK now adds service.instance.id (the pod hostname) to OTEL_RESOURCE_ATTRIBUTES. The OTLP->Prometheus exporter maps it to the `instance` label on both metrics and target_info, which (1) stops multi-replica services from colliding on a single series and (2) gives PromQL joins a unique (job, instance) key. Falls back to the service name when the hostname is unavailable.
This commit is contained in:
@@ -22,7 +22,17 @@ import (
|
||||
// SetupOTelSDK bootstraps the OpenTelemetry pipeline.
|
||||
func SetupOTelSDK(ctx context.Context, enabled bool, serviceName, buildVersion, environment string) (func(context.Context) error, error) {
|
||||
if os.Getenv("OTEL_RESOURCE_ATTRIBUTES") == "" {
|
||||
if err := os.Setenv("OTEL_RESOURCE_ATTRIBUTES", fmt.Sprintf("service.name=%s,service.version=%s,service.environment=%s", serviceName, buildVersion, environment)); err != nil {
|
||||
// service.instance.id makes every pod a distinct telemetry resource. The
|
||||
// OTLP→Prometheus exporter maps it to the `instance` label on metrics and
|
||||
// target_info, which keeps multi-replica services from colliding on a
|
||||
// single series and gives joins a unique (job, instance) key. Hostname is
|
||||
// the pod name under Kubernetes; fall back to the service name if it is
|
||||
// unavailable so the attribute is always present.
|
||||
instanceID, err := os.Hostname()
|
||||
if err != nil || instanceID == "" {
|
||||
instanceID = serviceName
|
||||
}
|
||||
if err := os.Setenv("OTEL_RESOURCE_ATTRIBUTES", fmt.Sprintf("service.name=%s,service.version=%s,service.environment=%s,service.instance.id=%s", serviceName, buildVersion, environment, instanceID)); err != nil {
|
||||
return func(context.Context) error {
|
||||
return nil
|
||||
}, err
|
||||
|
||||
Reference in New Issue
Block a user