diff --git a/.gitlab/generate-appsec.php b/.gitlab/generate-appsec.php index 06c65cc1166..81f4df374c3 100644 --- a/.gitlab/generate-appsec.php +++ b/.gitlab/generate-appsec.php @@ -250,7 +250,7 @@ - apt update && apt install -y openjdk-17-jre - | echo "Installing codecov CLI" - curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import + curl -fsSL "https://keyserver.ubuntu.com/pks/lookup?op=get&options=mr&search=0x27034E7FDB850E0BBC2C62FF806BB28AED779869" | gpg --no-default-keyring --keyring trustedkeys.gpg --import CODECOV_VERSION=0.6.1 curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/linux/codecov curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/linux/codecov.SHA256SUM @@ -325,7 +325,7 @@ - apt update && apt install -y openjdk-17-jre - | echo "Installing codecov CLI" - curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import + curl -fsSL "https://keyserver.ubuntu.com/pks/lookup?op=get&options=mr&search=0x27034E7FDB850E0BBC2C62FF806BB28AED779869" | gpg --no-default-keyring --keyring trustedkeys.gpg --import CODECOV_VERSION=0.6.1 curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/linux/codecov curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/linux/codecov.SHA256SUM @@ -415,7 +415,7 @@ CODECOV_TOKEN=$(vault kv get --format=json kv/k8s/gitlab-runner/dd-trace-php/codecov | jq -r .data.data.token) CODECOV_VERSION=0.6.1 CODECOV_ARCH=linux - curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import + curl -fsSL "https://keyserver.ubuntu.com/pks/lookup?op=get&options=mr&search=0x27034E7FDB850E0BBC2C62FF806BB28AED779869" | gpg --no-default-keyring --keyring trustedkeys.gpg --import curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/${CODECOV_ARCH}/codecov curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/${CODECOV_ARCH}/codecov.SHA256SUM curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/${CODECOV_ARCH}/codecov.SHA256SUM.sig diff --git a/appsec/helper-rust/src/client.rs b/appsec/helper-rust/src/client.rs index f54592f95e8..e9cb408171c 100644 --- a/appsec/helper-rust/src/client.rs +++ b/appsec/helper-rust/src/client.rs @@ -357,7 +357,7 @@ async fn do_request_loop_iter( cancel_token: &CancellationToken, ) -> anyhow::Result<()> { // wait for any number of config_syncs, followed by request_init - let mut req_ctx = match recv_command(framed, cancel_token).await? { + let req_init_args = match recv_command(framed, cancel_token).await? { protocol::Command::RequestInit(req) => { let service = client.get_service(); let config_snapshot = service.config_snapshot(); @@ -372,19 +372,7 @@ async fn do_request_loop_iter( return Ok(()); } - let mut req_ctx = ReqContext::new(service, config_snapshot.clone()); - let result = req_ctx - .run_waf(req.data, &protocol::RequestExecOptions::regular()) - .map_err(FatalRequestError)?; - - let resp = protocol::CommandResponse::RequestInit(protocol::RequestInitResp { - triggers: &result.triggers, - actions: &result.actions, - force_keep: req_ctx.should_force_keep(service, result.waf_keep), - settings: req_ctx.settings(), - }); - send_command_resp(framed, resp).await?; - req_ctx + req } protocol::Command::ConfigSync(args) => { @@ -409,6 +397,39 @@ async fn do_request_loop_iter( } }; + let mut req_ctx = { + let service = client.get_service(); + ReqContext::new(service, service.config_snapshot().clone()) + }; + + let result = run_request(client, framed, cancel_token, &mut req_ctx, req_init_args).await; + + // submit unconditionally: even if a fatal error coccurs + submit_context_telemetry_metrics(client, &mut req_ctx); + submit_service_telemetry(client, client.get_service()); + + result +} + +async fn run_request( + client: &mut Client, + framed: &mut tokio_util::codec::Framed, + cancel_token: &CancellationToken, + req_ctx: &mut ReqContext, + req_init: Box, +) -> anyhow::Result<()> { + let result = req_ctx + .run_waf(req_init.data, &protocol::RequestExecOptions::regular()) + .map_err(FatalRequestError)?; + + let resp = protocol::CommandResponse::RequestInit(protocol::RequestInitResp { + triggers: &result.triggers, + actions: &result.actions, + force_keep: req_ctx.should_force_keep(client.get_service(), result.waf_keep), + settings: req_ctx.settings(), + }); + send_command_resp(framed, resp).await?; + loop { match recv_command(framed, cancel_token).await? { protocol::Command::RequestExec(req) => { @@ -474,8 +495,7 @@ async fn do_request_loop_iter( req_ctx.generate_span_metrics(&mut span_submitter); req_ctx.generate_meta(&mut span_submitter); - let service = client.get_service(); - let force_keep = req_ctx.should_force_keep(service, result.waf_keep); + let force_keep = req_ctx.should_force_keep(client.get_service(), result.waf_keep); let resp = protocol::CommandResponse::RequestShutdown(protocol::RequestShutdownResp { @@ -488,9 +508,6 @@ async fn do_request_loop_iter( }); send_command_resp(framed, resp).await?; - submit_context_telemetry_metrics(client, &mut req_ctx); - submit_service_telemetry(client, service); - break; } command => { @@ -584,6 +601,22 @@ impl ReqContext { } } + /// Records a WAF evaluation error against the appropriate metric: appsec.waf.error + /// for non-RASP runs, appsec.rasp.error for RASP runs (RFC-1012). + fn record_eval_error(&mut self, error_code: i32, options: &protocol::RequestExecOptions) { + match &options.run_type { + WafRunType::NonRasp => self.waf_metrics.record_non_rasp_error_eval(error_code), + WafRunType::RaspRule { + rule_type, + rule_variant, + } => self.waf_metrics.record_rasp_error_eval( + error_code, + rule_type, + rule_variant.as_deref().unwrap_or(""), + ), + } + } + fn run_waf( &mut self, data: libddwaf::object::WafMap, @@ -598,7 +631,13 @@ impl ReqContext { let res = match maybe_res { Ok(res) => res, Err(err) => { - self.waf_metrics.record_non_rasp_error_eval(); + let error_code = match err { + libddwaf::RunError::InternalError => -3i32, + libddwaf::RunError::InvalidObject => -2i32, + libddwaf::RunError::InvalidArgument => -1i32, + _ => -127i32, + }; + self.record_eval_error(error_code, options); anyhow::bail!("WAF evaluation error: {}", err); } }; diff --git a/appsec/helper-rust/src/client/metrics.rs b/appsec/helper-rust/src/client/metrics.rs index 9c3b7848dce..00861270927 100644 --- a/appsec/helper-rust/src/client/metrics.rs +++ b/appsec/helper-rust/src/client/metrics.rs @@ -35,8 +35,12 @@ pub struct WafMetrics { // Ruleset version (context for tag generation) rules_version: Option, - /// Whether a non-RASP evaluation hit an error - waf_hit_error: bool, + /// The error code of the last non-RASP evaluation that hit an error, if any + waf_error_code: Option, + + /// The RASP evaluation that hit an error, if any. RASP errors are reported + /// separately from non-RASP ones (appsec.rasp.error vs appsec.waf.error) + rasp_error: Option, /// Total WAF execution time in milliseconds (non-RASP calls only) waf_duration: Duration, @@ -74,6 +78,14 @@ pub struct WafMetrics { rate_limited: bool, } +#[derive(Debug)] +struct RaspError { + /// The numeric error returned by ddwaf_run, or -127 if from the bindings. + code: i32, + rule_type: String, + rule_variant: String, +} + #[derive(Default, Debug, Clone)] pub struct RaspRuleMetrics { /// Total number of RASP rule evaluations, whether they matched or not @@ -95,7 +107,8 @@ impl WafMetrics { pub fn new(rules_version: Option) -> Self { Self { rules_version, - waf_hit_error: false, + waf_error_code: None, + rasp_error: None, waf_duration: Duration::ZERO, waf_hit_timeout: false, rasp_duration: Duration::ZERO, @@ -117,8 +130,22 @@ impl WafMetrics { self.rate_limited = rate_limited; } - pub fn record_non_rasp_error_eval(&mut self) { - self.waf_hit_error = true; + pub fn record_non_rasp_error_eval(&mut self, error_code: i32) { + self.waf_error_code = Some(error_code); + } + + pub fn record_rasp_error_eval(&mut self, error_code: i32, rule_type: &str, rule_variant: &str) { + self.rasp_error = Some(RaspError { + code: error_code, + rule_type: rule_type.to_string(), + rule_variant: rule_variant.to_string(), + }); + // questionable but we still count towards these metrics even with error + self.rasp_rule_evals += 1; + self.rasp_per_rule + .entry((rule_type.to_string(), rule_variant.to_string())) + .or_default() + .evals += 1; } pub fn record_non_rasp_eval(&mut self, run_output: &libddwaf::RunOutput) { @@ -193,68 +220,63 @@ impl telemetry::TelemetryMetricsGenerator for WafMetrics { &'_ self, submitter: &mut dyn telemetry::TelemetryMetricSubmitter, ) { + let base_tags = { + let mut tags = telemetry::TelemetryTags::new(); + tags.add("waf_version", crate::service::Service::waf_version()); + tags.add( + "event_rules_version", + self.rules_version.as_deref().unwrap_or("unknown"), + ); + tags + }; + // waf.requests metrics // RFC-1012: all boolean tags must be emitted regardless of value. - let mut tags = telemetry::TelemetryTags::new(); - tags.add("waf_version", crate::service::Service::waf_version()); - tags.add( - "event_rules_version", - self.rules_version.as_deref().unwrap_or("unknown"), - ); + let mut tags = base_tags.clone(); tags.add("rule_triggered", bool_tag(self.had_triggers)); // block_failure is not tracked: the PHP layer is assumed to always succeed at blocking. // Therefore request_blocked == "WAF requested a block" == "block succeeded". // request_excluded is not tracked: libddwaf applies exclusion filters internally and // does not expose whether a request was excluded in RunOutput. tags.add("request_blocked", bool_tag(self.request_blocked)); - tags.add("waf_error", bool_tag(self.waf_hit_error)); + tags.add("waf_error", bool_tag(self.waf_error_code.is_some())); tags.add("waf_timeout", bool_tag(self.waf_hit_timeout)); tags.add("input_truncated", bool_tag(self.input_truncated)); tags.add("rate_limited", bool_tag(self.rate_limited)); submitter.submit_metric(telemetry::WAF_REQUESTS, 1.0, tags); + // waf.error + if let Some(error_code) = self.waf_error_code { + let mut err_tags = base_tags.clone(); + err_tags.add("waf_error", error_code.to_string()); + submitter.submit_metric(telemetry::WAF_ERROR, 1.0, err_tags); + } + // waf.duration distribution: one observation per request, value in microseconds if !self.waf_duration.is_zero() { - let mut dur_tags = telemetry::TelemetryTags::new(); - dur_tags.add("waf_version", crate::service::Service::waf_version()); - dur_tags.add( - "event_rules_version", - self.rules_version.as_deref().unwrap_or("unknown"), - ); submitter.submit_metric( telemetry::WAF_DURATION_DIST, self.waf_duration.as_micros() as f64, - dur_tags, + base_tags.clone(), ); } // rasp.duration distribution: cumulative internal libddwaf runtime per request, in microseconds if !self.rasp_duration.is_zero() { - let mut dur_tags = telemetry::TelemetryTags::new(); - dur_tags.add("waf_version", crate::service::Service::waf_version()); - dur_tags.add( - "event_rules_version", - self.rules_version.as_deref().unwrap_or("unknown"), - ); submitter.submit_metric( telemetry::RASP_DURATION_DIST, self.rasp_duration.as_micros() as f64, - dur_tags, + base_tags.clone(), ); } // Rasp rule metrics for ((rule_type, rule_variant), metrics) in &self.rasp_per_rule { - let mut tags = telemetry::TelemetryTags::new(); + let mut tags = base_tags.clone(); tags.add("rule_type", rule_type); if !rule_variant.is_empty() { tags.add("rule_variant", rule_variant); } - tags.add("waf_version", crate::service::Service::waf_version()); - tags.add( - "event_rules_version", - self.rules_version.as_deref().unwrap_or("unknown"), - ); if metrics.evals > 0 { submitter.submit_metric( @@ -287,6 +309,17 @@ impl telemetry::TelemetryMetricsGenerator for WafMetrics { // tests expect this to always be sent, even if 0 submitter.submit_metric(telemetry::RASP_TIMEOUT, metrics.timeouts as f64, tags); } + + // rasp.error + if let Some(ref err) = self.rasp_error { + let mut err_tags = base_tags.clone(); + err_tags.add("rule_type", &err.rule_type); + if !err.rule_variant.is_empty() { + err_tags.add("rule_variant", &err.rule_variant); + } + err_tags.add("waf_error", err.code.to_string()); + submitter.submit_metric(telemetry::RASP_ERROR, 1.0, err_tags); + } } } @@ -320,12 +353,3 @@ fn bool_tag(value: bool) -> &'static str { "false" } } - -trait DurationExt { - fn duration_ms_f64(&self) -> f64; -} -impl DurationExt for Duration { - fn duration_ms_f64(&self) -> f64 { - self.as_secs() as f64 * 1_000.0 + self.subsec_nanos() as f64 / 1_000_000.0 - } -} diff --git a/appsec/helper-rust/src/telemetry.rs b/appsec/helper-rust/src/telemetry.rs index 605804cc7b6..a722c52b8fd 100644 --- a/appsec/helper-rust/src/telemetry.rs +++ b/appsec/helper-rust/src/telemetry.rs @@ -93,11 +93,13 @@ pub const WAF_INIT: MetricName = MetricName("waf.init"); pub const WAF_UPDATES: MetricName = MetricName("waf.updates"); pub const WAF_REQUESTS: MetricName = MetricName("waf.requests"); pub const WAF_CONFIG_ERRORS: MetricName = MetricName("waf.config_errors"); +pub const WAF_ERROR: MetricName = MetricName("waf.error"); pub const WAF_DURATION_DIST: MetricName = MetricName("waf.duration"); pub const RASP_DURATION_DIST: MetricName = MetricName("rasp.duration"); pub const RASP_RULE_EVAL: MetricName = MetricName("rasp.rule.eval"); pub const RASP_RULE_MATCH: MetricName = MetricName("rasp.rule.match"); pub const RASP_TIMEOUT: MetricName = MetricName("rasp.timeout"); +pub const RASP_ERROR: MetricName = MetricName("rasp.error"); pub const HELPER_WORKER_COUNT: MetricName = MetricName("helper.service_worker_count"); #[derive(Debug, Clone, Copy)] @@ -122,6 +124,10 @@ pub const KNOWN_METRICS: &[KnownMetric] = &[ name: WAF_CONFIG_ERRORS, metric_type: ddog_MetricType_DDOG_METRIC_TYPE_COUNT, }, + KnownMetric { + name: WAF_ERROR, + metric_type: ddog_MetricType_DDOG_METRIC_TYPE_COUNT, + }, KnownMetric { name: WAF_DURATION_DIST, metric_type: ddog_MetricType_DDOG_METRIC_TYPE_DISTRIBUTION, @@ -142,6 +148,10 @@ pub const KNOWN_METRICS: &[KnownMetric] = &[ name: RASP_RULE_EVAL, metric_type: ddog_MetricType_DDOG_METRIC_TYPE_COUNT, }, + KnownMetric { + name: RASP_ERROR, + metric_type: ddog_MetricType_DDOG_METRIC_TYPE_COUNT, + }, KnownMetric { name: HELPER_WORKER_COUNT, metric_type: ddog_MetricType_DDOG_METRIC_TYPE_GAUGE, diff --git a/appsec/tests/integration/src/test/groovy/com/datadog/appsec/php/integration/TelemetryTests.groovy b/appsec/tests/integration/src/test/groovy/com/datadog/appsec/php/integration/TelemetryTests.groovy index b5c17618564..1d16ea3786f 100644 --- a/appsec/tests/integration/src/test/groovy/com/datadog/appsec/php/integration/TelemetryTests.groovy +++ b/appsec/tests/integration/src/test/groovy/com/datadog/appsec/php/integration/TelemetryTests.groovy @@ -118,7 +118,6 @@ class TelemetryTests { TelemetryHelpers.Metric connSuccess TelemetryHelpers.Metric workerCount - TelemetryHelpers.waitForMetrics(CONTAINER, 30) { List messages -> def allSeries = messages.collectMany { it.series } wafInit = allSeries.find { it.name == 'waf.init' } @@ -1384,4 +1383,5 @@ class TelemetryTests { assert bundledDiagLog.message == "{\"missing key 'conditions'\":[\"bad-rule\"]}" } } + }