diff --git a/lib/builds/metrics.go b/lib/builds/metrics.go index 0adee036..b99b0dd3 100644 --- a/lib/builds/metrics.go +++ b/lib/builds/metrics.go @@ -4,6 +4,7 @@ import ( "context" "time" + hypotel "github.com/kernel/hypeman/lib/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" ) @@ -22,6 +23,7 @@ func NewMetrics(meter metric.Meter) (*Metrics, error) { "hypeman_build_duration_seconds", metric.WithDescription("Duration of builds in seconds"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.BuildDurationHistogramBuckets()...), ) if err != nil { return nil, err diff --git a/lib/egressproxy/metrics.go b/lib/egressproxy/metrics.go index e03ed5bd..3fba44c3 100644 --- a/lib/egressproxy/metrics.go +++ b/lib/egressproxy/metrics.go @@ -3,15 +3,11 @@ package egressproxy import ( "context" + hypotel "github.com/kernel/hypeman/lib/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" ) -var ( - controlPlaneDurationBuckets = []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 5} - upstreamDurationBuckets = []float64{0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60} -) - type metrics struct { registrations metric.Int64Counter ruleUpdates metric.Int64Counter @@ -51,7 +47,7 @@ func newMetrics(meter metric.Meter, svc *Service) (*metrics, error) { "hypeman_egress_proxy_control_plane_duration_seconds", metric.WithDescription("Duration of egress proxy control plane operations"), metric.WithUnit("s"), - metric.WithExplicitBucketBoundaries(controlPlaneDurationBuckets...), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err @@ -69,7 +65,7 @@ func newMetrics(meter metric.Meter, svc *Service) (*metrics, error) { "hypeman_egress_proxy_upstream_duration_seconds", metric.WithDescription("Duration of egress proxy upstream requests"), metric.WithUnit("s"), - metric.WithExplicitBucketBoundaries(upstreamDurationBuckets...), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err diff --git a/lib/guest/metrics.go b/lib/guest/metrics.go index f85726b1..5b4952b5 100644 --- a/lib/guest/metrics.go +++ b/lib/guest/metrics.go @@ -4,6 +4,7 @@ import ( "context" "time" + hypotel "github.com/kernel/hypeman/lib/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" ) @@ -48,6 +49,7 @@ func NewMetrics(meter metric.Meter) (*Metrics, error) { "hypeman_exec_duration_seconds", metric.WithDescription("Exec command duration"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err @@ -83,6 +85,7 @@ func NewMetrics(meter metric.Meter) (*Metrics, error) { "hypeman_cp_duration_seconds", metric.WithDescription("Copy operation duration"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err diff --git a/lib/guestmemory/metrics.go b/lib/guestmemory/metrics.go index 9bba181c..fd15b558 100644 --- a/lib/guestmemory/metrics.go +++ b/lib/guestmemory/metrics.go @@ -5,6 +5,7 @@ import ( "time" "github.com/kernel/hypeman/lib/hypervisor" + hypotel "github.com/kernel/hypeman/lib/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" ) @@ -53,6 +54,7 @@ func NewMetrics(meter metric.Meter) (*Metrics, error) { "hypeman_guestmemory_reconcile_duration_seconds", metric.WithDescription("Guest memory reconcile duration"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err diff --git a/lib/images/metrics.go b/lib/images/metrics.go index 1e8d480a..cc466741 100644 --- a/lib/images/metrics.go +++ b/lib/images/metrics.go @@ -4,6 +4,7 @@ import ( "context" "time" + hypotel "github.com/kernel/hypeman/lib/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" ) @@ -20,6 +21,7 @@ func newMetrics(meter metric.Meter, m *manager) (*Metrics, error) { "hypeman_images_build_duration_seconds", metric.WithDescription("Time to build an image"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.BuildDurationHistogramBuckets()...), ) if err != nil { return nil, err diff --git a/lib/instances/metrics.go b/lib/instances/metrics.go index 453e45ed..bee79d1a 100644 --- a/lib/instances/metrics.go +++ b/lib/instances/metrics.go @@ -7,6 +7,7 @@ import ( "github.com/kernel/hypeman/lib/hypervisor" mw "github.com/kernel/hypeman/lib/middleware" + hypotel "github.com/kernel/hypeman/lib/otel" snapshotstore "github.com/kernel/hypeman/lib/snapshot" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" @@ -86,6 +87,7 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M "hypeman_instances_create_duration_seconds", metric.WithDescription("Time to create an instance"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err @@ -95,6 +97,7 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M "hypeman_instances_restore_duration_seconds", metric.WithDescription("Time to restore an instance from standby"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err @@ -104,6 +107,7 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M "hypeman_instances_standby_duration_seconds", metric.WithDescription("Time to put an instance in standby"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err @@ -113,6 +117,7 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M "hypeman_instances_stop_duration_seconds", metric.WithDescription("Time to stop an instance"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err @@ -122,6 +127,7 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M "hypeman_instances_start_duration_seconds", metric.WithDescription("Time to start an instance"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err @@ -131,6 +137,7 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M "hypeman_instances_time_to_running_seconds", metric.WithDescription("Time from boot start until an instance reaches Running"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err @@ -156,6 +163,7 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M "hypeman_snapshot_compression_duration_seconds", metric.WithDescription("Time to asynchronously compress snapshot memory"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err @@ -198,6 +206,7 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M "hypeman_snapshot_restore_memory_prepare_duration_seconds", metric.WithDescription("Time to prepare snapshot memory before restore"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err diff --git a/lib/middleware/otel.go b/lib/middleware/otel.go index 663d11c2..49eef1f2 100644 --- a/lib/middleware/otel.go +++ b/lib/middleware/otel.go @@ -10,6 +10,7 @@ import ( "github.com/go-chi/chi/v5" "github.com/kernel/hypeman/lib/logger" + hypotel "github.com/kernel/hypeman/lib/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" ) @@ -36,6 +37,7 @@ func NewHTTPMetrics(meter metric.Meter) (*HTTPMetrics, error) { "hypeman_http_request_duration_seconds", metric.WithDescription("HTTP request duration in seconds"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err diff --git a/lib/otel/buckets.go b/lib/otel/buckets.go new file mode 100644 index 00000000..c8670511 --- /dev/null +++ b/lib/otel/buckets.go @@ -0,0 +1,25 @@ +package otel + +var commonDurationHistogramBuckets = []float64{ + 0.001, 0.0025, 0.005, 0.010, 0.025, 0.050, 0.075, + 0.100, 0.150, 0.200, 0.300, 0.500, 0.750, + 1, 1.5, 2, 2.5, 3, 4, 5, 7.5, 10, + 20, 30, 45, 60, 90, 120, +} + +var buildDurationHistogramBuckets = []float64{ + 0.100, 0.250, 0.500, 1, 2.5, 5, 10, + 20, 30, 45, 60, 90, 120, 180, 300, 600, 900, 1800, +} + +// CommonDurationHistogramBuckets returns the standard duration bucket set for +// Hypeman duration histograms. +func CommonDurationHistogramBuckets() []float64 { + return append([]float64(nil), commonDurationHistogramBuckets...) +} + +// BuildDurationHistogramBuckets returns the slower-moving duration bucket set +// used for build-style operations. +func BuildDurationHistogramBuckets() []float64 { + return append([]float64(nil), buildDurationHistogramBuckets...) +} diff --git a/lib/vmm/metrics.go b/lib/vmm/metrics.go index 19b514cf..3dcd02d8 100644 --- a/lib/vmm/metrics.go +++ b/lib/vmm/metrics.go @@ -4,6 +4,7 @@ import ( "context" "time" + hypotel "github.com/kernel/hypeman/lib/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" ) @@ -34,6 +35,7 @@ func NewMetrics(meter metric.Meter) (*Metrics, error) { "hypeman_vmm_api_duration_seconds", metric.WithDescription("Cloud Hypervisor API call duration"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err diff --git a/lib/volumes/metrics.go b/lib/volumes/metrics.go index ad8db491..7f94a0a5 100644 --- a/lib/volumes/metrics.go +++ b/lib/volumes/metrics.go @@ -6,6 +6,7 @@ import ( "syscall" "time" + hypotel "github.com/kernel/hypeman/lib/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" ) @@ -21,6 +22,7 @@ func newVolumeMetrics(meter metric.Meter, m *manager) (*Metrics, error) { "hypeman_volumes_create_duration_seconds", metric.WithDescription("Time to create a volume"), metric.WithUnit("s"), + metric.WithExplicitBucketBoundaries(hypotel.CommonDurationHistogramBuckets()...), ) if err != nil { return nil, err