Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .gitlab/generate-appsec.php
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@
- apt update && apt install -y openjdk-17-jre
- |
echo "Installing codecov CLI"
curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import
curl -fsSL "https://keyserver.ubuntu.com/pks/lookup?op=get&options=mr&search=0x27034E7FDB850E0BBC2C62FF806BB28AED779869" | gpg --no-default-keyring --keyring trustedkeys.gpg --import
CODECOV_VERSION=0.6.1
curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/linux/codecov
curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/linux/codecov.SHA256SUM
Expand Down Expand Up @@ -325,7 +325,7 @@
- apt update && apt install -y openjdk-17-jre
- |
echo "Installing codecov CLI"
curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import
curl -fsSL "https://keyserver.ubuntu.com/pks/lookup?op=get&options=mr&search=0x27034E7FDB850E0BBC2C62FF806BB28AED779869" | gpg --no-default-keyring --keyring trustedkeys.gpg --import
CODECOV_VERSION=0.6.1
curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/linux/codecov
curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/linux/codecov.SHA256SUM
Expand Down Expand Up @@ -415,7 +415,7 @@
CODECOV_TOKEN=$(vault kv get --format=json kv/k8s/gitlab-runner/dd-trace-php/codecov | jq -r .data.data.token)
CODECOV_VERSION=0.6.1
CODECOV_ARCH=linux
curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import
curl -fsSL "https://keyserver.ubuntu.com/pks/lookup?op=get&options=mr&search=0x27034E7FDB850E0BBC2C62FF806BB28AED779869" | gpg --no-default-keyring --keyring trustedkeys.gpg --import
curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/${CODECOV_ARCH}/codecov
curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/${CODECOV_ARCH}/codecov.SHA256SUM
curl -Os https://uploader.codecov.io/v${CODECOV_VERSION}/${CODECOV_ARCH}/codecov.SHA256SUM.sig
Expand Down
79 changes: 59 additions & 20 deletions appsec/helper-rust/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ async fn do_request_loop_iter(
cancel_token: &CancellationToken,
) -> anyhow::Result<()> {
// wait for any number of config_syncs, followed by request_init
let mut req_ctx = match recv_command(framed, cancel_token).await? {
let req_init_args = match recv_command(framed, cancel_token).await? {
protocol::Command::RequestInit(req) => {
let service = client.get_service();
let config_snapshot = service.config_snapshot();
Expand All @@ -372,19 +372,7 @@ async fn do_request_loop_iter(
return Ok(());
}

let mut req_ctx = ReqContext::new(service, config_snapshot.clone());
let result = req_ctx
.run_waf(req.data, &protocol::RequestExecOptions::regular())
.map_err(FatalRequestError)?;

let resp = protocol::CommandResponse::RequestInit(protocol::RequestInitResp {
triggers: &result.triggers,
actions: &result.actions,
force_keep: req_ctx.should_force_keep(service, result.waf_keep),
settings: req_ctx.settings(),
});
send_command_resp(framed, resp).await?;
req_ctx
req
}

protocol::Command::ConfigSync(args) => {
Expand All @@ -409,6 +397,39 @@ async fn do_request_loop_iter(
}
};

let mut req_ctx = {
let service = client.get_service();
ReqContext::new(service, service.config_snapshot().clone())
};

let result = run_request(client, framed, cancel_token, &mut req_ctx, req_init_args).await;

// submit unconditionally: even if a fatal error coccurs
submit_context_telemetry_metrics(client, &mut req_ctx);
submit_service_telemetry(client, client.get_service());

result
}

async fn run_request(
client: &mut Client,
framed: &mut tokio_util::codec::Framed<UnixStream, protocol::CommandCodec>,
cancel_token: &CancellationToken,
req_ctx: &mut ReqContext,
req_init: Box<protocol::RequestInitArgs>,
) -> anyhow::Result<()> {
let result = req_ctx
.run_waf(req_init.data, &protocol::RequestExecOptions::regular())
.map_err(FatalRequestError)?;

let resp = protocol::CommandResponse::RequestInit(protocol::RequestInitResp {
triggers: &result.triggers,
actions: &result.actions,
force_keep: req_ctx.should_force_keep(client.get_service(), result.waf_keep),
settings: req_ctx.settings(),
});
send_command_resp(framed, resp).await?;

loop {
match recv_command(framed, cancel_token).await? {
protocol::Command::RequestExec(req) => {
Expand Down Expand Up @@ -474,8 +495,7 @@ async fn do_request_loop_iter(
req_ctx.generate_span_metrics(&mut span_submitter);
req_ctx.generate_meta(&mut span_submitter);

let service = client.get_service();
let force_keep = req_ctx.should_force_keep(service, result.waf_keep);
let force_keep = req_ctx.should_force_keep(client.get_service(), result.waf_keep);

let resp =
protocol::CommandResponse::RequestShutdown(protocol::RequestShutdownResp {
Expand All @@ -488,9 +508,6 @@ async fn do_request_loop_iter(
});
send_command_resp(framed, resp).await?;

submit_context_telemetry_metrics(client, &mut req_ctx);
submit_service_telemetry(client, service);

break;
}
command => {
Expand Down Expand Up @@ -584,6 +601,22 @@ impl ReqContext {
}
}

/// Records a WAF evaluation error against the appropriate metric: appsec.waf.error
/// for non-RASP runs, appsec.rasp.error for RASP runs (RFC-1012).
fn record_eval_error(&mut self, error_code: i32, options: &protocol::RequestExecOptions) {
match &options.run_type {
WafRunType::NonRasp => self.waf_metrics.record_non_rasp_error_eval(error_code),
WafRunType::RaspRule {
rule_type,
rule_variant,
} => self.waf_metrics.record_rasp_error_eval(
error_code,
rule_type,
rule_variant.as_deref().unwrap_or(""),
),
}
}

fn run_waf(
&mut self,
data: libddwaf::object::WafMap,
Expand All @@ -598,7 +631,13 @@ impl ReqContext {
let res = match maybe_res {
Ok(res) => res,
Err(err) => {
self.waf_metrics.record_non_rasp_error_eval();
let error_code = match err {
libddwaf::RunError::InternalError => -3i32,
libddwaf::RunError::InvalidObject => -2i32,
libddwaf::RunError::InvalidArgument => -1i32,
_ => -127i32,
};
self.record_eval_error(error_code, options);
anyhow::bail!("WAF evaluation error: {}", err);
}
};
Expand Down
106 changes: 65 additions & 41 deletions appsec/helper-rust/src/client/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,12 @@ pub struct WafMetrics {
// Ruleset version (context for tag generation)
rules_version: Option<String>,

/// Whether a non-RASP evaluation hit an error
waf_hit_error: bool,
/// The error code of the last non-RASP evaluation that hit an error, if any
waf_error_code: Option<i32>,

/// The RASP evaluation that hit an error, if any. RASP errors are reported
/// separately from non-RASP ones (appsec.rasp.error vs appsec.waf.error)
rasp_error: Option<RaspError>,

/// Total WAF execution time in milliseconds (non-RASP calls only)
waf_duration: Duration,
Expand Down Expand Up @@ -74,6 +78,14 @@ pub struct WafMetrics {
rate_limited: bool,
}

#[derive(Debug)]
struct RaspError {
/// The numeric error returned by ddwaf_run, or -127 if from the bindings.
code: i32,
rule_type: String,
rule_variant: String,
}

#[derive(Default, Debug, Clone)]
pub struct RaspRuleMetrics {
/// Total number of RASP rule evaluations, whether they matched or not
Expand All @@ -95,7 +107,8 @@ impl WafMetrics {
pub fn new(rules_version: Option<String>) -> Self {
Self {
rules_version,
waf_hit_error: false,
waf_error_code: None,
rasp_error: None,
waf_duration: Duration::ZERO,
waf_hit_timeout: false,
rasp_duration: Duration::ZERO,
Expand All @@ -117,8 +130,22 @@ impl WafMetrics {
self.rate_limited = rate_limited;
}

pub fn record_non_rasp_error_eval(&mut self) {
self.waf_hit_error = true;
pub fn record_non_rasp_error_eval(&mut self, error_code: i32) {
self.waf_error_code = Some(error_code);
}

pub fn record_rasp_error_eval(&mut self, error_code: i32, rule_type: &str, rule_variant: &str) {
self.rasp_error = Some(RaspError {
code: error_code,
rule_type: rule_type.to_string(),
rule_variant: rule_variant.to_string(),
});
// questionable but we still count towards these metrics even with error
self.rasp_rule_evals += 1;
self.rasp_per_rule
.entry((rule_type.to_string(), rule_variant.to_string()))
.or_default()
.evals += 1;
}

pub fn record_non_rasp_eval(&mut self, run_output: &libddwaf::RunOutput) {
Expand Down Expand Up @@ -193,68 +220,63 @@ impl telemetry::TelemetryMetricsGenerator for WafMetrics {
&'_ self,
submitter: &mut dyn telemetry::TelemetryMetricSubmitter,
) {
let base_tags = {
let mut tags = telemetry::TelemetryTags::new();
tags.add("waf_version", crate::service::Service::waf_version());
tags.add(
"event_rules_version",
self.rules_version.as_deref().unwrap_or("unknown"),
);
tags
};

// waf.requests metrics
// RFC-1012: all boolean tags must be emitted regardless of value.
let mut tags = telemetry::TelemetryTags::new();
tags.add("waf_version", crate::service::Service::waf_version());
tags.add(
"event_rules_version",
self.rules_version.as_deref().unwrap_or("unknown"),
);
let mut tags = base_tags.clone();
tags.add("rule_triggered", bool_tag(self.had_triggers));
// block_failure is not tracked: the PHP layer is assumed to always succeed at blocking.
// Therefore request_blocked == "WAF requested a block" == "block succeeded".
// request_excluded is not tracked: libddwaf applies exclusion filters internally and
// does not expose whether a request was excluded in RunOutput.
tags.add("request_blocked", bool_tag(self.request_blocked));
tags.add("waf_error", bool_tag(self.waf_hit_error));
tags.add("waf_error", bool_tag(self.waf_error_code.is_some()));
Comment thread
cataphract marked this conversation as resolved.
tags.add("waf_timeout", bool_tag(self.waf_hit_timeout));
tags.add("input_truncated", bool_tag(self.input_truncated));
tags.add("rate_limited", bool_tag(self.rate_limited));
submitter.submit_metric(telemetry::WAF_REQUESTS, 1.0, tags);

// waf.error
if let Some(error_code) = self.waf_error_code {
let mut err_tags = base_tags.clone();
err_tags.add("waf_error", error_code.to_string());
submitter.submit_metric(telemetry::WAF_ERROR, 1.0, err_tags);
}

// waf.duration distribution: one observation per request, value in microseconds
if !self.waf_duration.is_zero() {
let mut dur_tags = telemetry::TelemetryTags::new();
dur_tags.add("waf_version", crate::service::Service::waf_version());
dur_tags.add(
"event_rules_version",
self.rules_version.as_deref().unwrap_or("unknown"),
);
submitter.submit_metric(
telemetry::WAF_DURATION_DIST,
self.waf_duration.as_micros() as f64,
dur_tags,
base_tags.clone(),
);
}

// rasp.duration distribution: cumulative internal libddwaf runtime per request, in microseconds
if !self.rasp_duration.is_zero() {
let mut dur_tags = telemetry::TelemetryTags::new();
dur_tags.add("waf_version", crate::service::Service::waf_version());
dur_tags.add(
"event_rules_version",
self.rules_version.as_deref().unwrap_or("unknown"),
);
submitter.submit_metric(
telemetry::RASP_DURATION_DIST,
self.rasp_duration.as_micros() as f64,
dur_tags,
base_tags.clone(),
);
}

// Rasp rule metrics
for ((rule_type, rule_variant), metrics) in &self.rasp_per_rule {
let mut tags = telemetry::TelemetryTags::new();
let mut tags = base_tags.clone();
tags.add("rule_type", rule_type);
if !rule_variant.is_empty() {
tags.add("rule_variant", rule_variant);
}
tags.add("waf_version", crate::service::Service::waf_version());
tags.add(
"event_rules_version",
self.rules_version.as_deref().unwrap_or("unknown"),
);

if metrics.evals > 0 {
submitter.submit_metric(
Expand Down Expand Up @@ -287,6 +309,17 @@ impl telemetry::TelemetryMetricsGenerator for WafMetrics {
// tests expect this to always be sent, even if 0
submitter.submit_metric(telemetry::RASP_TIMEOUT, metrics.timeouts as f64, tags);
}

// rasp.error
if let Some(ref err) = self.rasp_error {
let mut err_tags = base_tags.clone();
err_tags.add("rule_type", &err.rule_type);
if !err.rule_variant.is_empty() {
err_tags.add("rule_variant", &err.rule_variant);
}
err_tags.add("waf_error", err.code.to_string());
submitter.submit_metric(telemetry::RASP_ERROR, 1.0, err_tags);
}
}
}

Expand Down Expand Up @@ -320,12 +353,3 @@ fn bool_tag(value: bool) -> &'static str {
"false"
}
}

trait DurationExt {
fn duration_ms_f64(&self) -> f64;
}
impl DurationExt for Duration {
fn duration_ms_f64(&self) -> f64 {
self.as_secs() as f64 * 1_000.0 + self.subsec_nanos() as f64 / 1_000_000.0
}
}
10 changes: 10 additions & 0 deletions appsec/helper-rust/src/telemetry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,13 @@ pub const WAF_INIT: MetricName = MetricName("waf.init");
pub const WAF_UPDATES: MetricName = MetricName("waf.updates");
pub const WAF_REQUESTS: MetricName = MetricName("waf.requests");
pub const WAF_CONFIG_ERRORS: MetricName = MetricName("waf.config_errors");
pub const WAF_ERROR: MetricName = MetricName("waf.error");
pub const WAF_DURATION_DIST: MetricName = MetricName("waf.duration");
pub const RASP_DURATION_DIST: MetricName = MetricName("rasp.duration");
pub const RASP_RULE_EVAL: MetricName = MetricName("rasp.rule.eval");
pub const RASP_RULE_MATCH: MetricName = MetricName("rasp.rule.match");
pub const RASP_TIMEOUT: MetricName = MetricName("rasp.timeout");
pub const RASP_ERROR: MetricName = MetricName("rasp.error");
pub const HELPER_WORKER_COUNT: MetricName = MetricName("helper.service_worker_count");

#[derive(Debug, Clone, Copy)]
Expand All @@ -122,6 +124,10 @@ pub const KNOWN_METRICS: &[KnownMetric] = &[
name: WAF_CONFIG_ERRORS,
metric_type: ddog_MetricType_DDOG_METRIC_TYPE_COUNT,
},
KnownMetric {
name: WAF_ERROR,
metric_type: ddog_MetricType_DDOG_METRIC_TYPE_COUNT,
},
KnownMetric {
name: WAF_DURATION_DIST,
metric_type: ddog_MetricType_DDOG_METRIC_TYPE_DISTRIBUTION,
Expand All @@ -142,6 +148,10 @@ pub const KNOWN_METRICS: &[KnownMetric] = &[
name: RASP_RULE_EVAL,
metric_type: ddog_MetricType_DDOG_METRIC_TYPE_COUNT,
},
KnownMetric {
name: RASP_ERROR,
metric_type: ddog_MetricType_DDOG_METRIC_TYPE_COUNT,
},
KnownMetric {
name: HELPER_WORKER_COUNT,
metric_type: ddog_MetricType_DDOG_METRIC_TYPE_GAUGE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ class TelemetryTests {
TelemetryHelpers.Metric connSuccess

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there no test checking for this new tags?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tags are only issued if there is a bug. libddwaf doesn't return failures otherwise. Initially, I added new environment variables to simulate a failure and tested those, but environment variables and filtered on libdatadog, so it became too complicated.

TelemetryHelpers.Metric workerCount


TelemetryHelpers.waitForMetrics(CONTAINER, 30) { List<TelemetryHelpers.GenerateMetrics> messages ->
def allSeries = messages.collectMany { it.series }
wafInit = allSeries.find { it.name == 'waf.init' }
Expand Down Expand Up @@ -1384,4 +1383,5 @@ class TelemetryTests {
assert bundledDiagLog.message == "{\"missing key 'conditions'\":[\"bad-rule\"]}"
}
}

}
Loading