Skip to content

Commit 063da46

Browse files
authored
feat(ai): AI cost calculation (#4840)
Fixes problems with old way of calculating costs where we only included input and output tokens into cost calculation, and refactors the whole `calculate_ai_model_cost` to be more robust, so that there is no need to change multiple functions when we want to include new field into cost calculation. Closes [TET-648: Refactor cost calculation](https://linear.app/getsentry/issue/TET-648/refactor-cost-calculation)
1 parent 073833c commit 063da46

File tree

6 files changed

+114
-103
lines changed

6 files changed

+114
-103
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
**Internal**:
1010

1111
- Produce spans to the items topic. ([#4735](https://github.com/getsentry/relay/pull/4735))
12+
- Take into account more types of tokens when doing AI cost calculation. ([#4840](https://github.com/getsentry/relay/pull/4840))
1213

1314
## 25.6.1
1415

relay-event-normalization/src/event.rs

Lines changed: 60 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,13 +1502,12 @@ mod tests {
15021502

15031503
use insta::assert_debug_snapshot;
15041504
use itertools::Itertools;
1505-
use relay_common::glob2::LazyGlob;
15061505
use relay_event_schema::protocol::{Breadcrumb, Csp, DebugMeta, DeviceContext, Values};
15071506
use relay_protocol::{SerializableAnnotated, get_value};
15081507
use serde_json::json;
15091508

15101509
use super::*;
1511-
use crate::{ClientHints, MeasurementsConfig, ModelCost};
1510+
use crate::{ClientHints, MeasurementsConfig, ModelCostV2};
15121511

15131512
const IOS_MOBILE_EVENT: &str = r#"
15141513
{
@@ -2204,8 +2203,11 @@ mod tests {
22042203
"parent_span_id": "a1e13f3f06239d69",
22052204
"trace_id": "922dda2462ea4ac2b6a4b339bee90863",
22062205
"measurements": {
2207-
"ai_total_tokens_used": {
2208-
"value": 1230
2206+
"ai_prompt_tokens_used": {
2207+
"value": 1000
2208+
},
2209+
"ai_completion_tokens_used": {
2210+
"value": 2000
22092211
}
22102212
},
22112213
"data": {
@@ -2244,25 +2246,28 @@ mod tests {
22442246
&mut event,
22452247
&NormalizationConfig {
22462248
ai_model_costs: Some(&ModelCosts {
2247-
version: 1,
2248-
costs: vec![
2249-
ModelCost {
2250-
model_id: LazyGlob::new("claude-2*"),
2251-
for_completion: false,
2252-
cost_per_1k_tokens: 1.0,
2253-
},
2254-
ModelCost {
2255-
model_id: LazyGlob::new("gpt4-21*"),
2256-
for_completion: false,
2257-
cost_per_1k_tokens: 2.0,
2258-
},
2259-
ModelCost {
2260-
model_id: LazyGlob::new("gpt4-21*"),
2261-
for_completion: true,
2262-
cost_per_1k_tokens: 20.0,
2263-
},
2264-
],
2265-
models: HashMap::new(),
2249+
version: 2,
2250+
costs: vec![],
2251+
models: HashMap::from([
2252+
(
2253+
"claude-2.1".to_owned(),
2254+
ModelCostV2 {
2255+
input_per_token: 0.01,
2256+
output_per_token: 0.02,
2257+
output_reasoning_per_token: 0.03,
2258+
input_cached_per_token: 0.0,
2259+
},
2260+
),
2261+
(
2262+
"gpt4-21-04".to_owned(),
2263+
ModelCostV2 {
2264+
input_per_token: 0.02,
2265+
output_per_token: 0.03,
2266+
output_reasoning_per_token: 0.04,
2267+
input_cached_per_token: 0.0,
2268+
},
2269+
),
2270+
]),
22662271
}),
22672272
..NormalizationConfig::default()
22682273
},
@@ -2276,15 +2281,15 @@ mod tests {
22762281
.and_then(|span| span.value())
22772282
.and_then(|span| span.data.value())
22782283
.and_then(|data| data.gen_ai_usage_total_cost.value()),
2279-
Some(&Value::F64(1.23))
2284+
Some(&Value::F64(50.0))
22802285
);
22812286
assert_eq!(
22822287
spans
22832288
.get(1)
22842289
.and_then(|span| span.value())
22852290
.and_then(|span| span.data.value())
22862291
.and_then(|data| data.gen_ai_usage_total_cost.value()),
2287-
Some(&Value::F64(20.0 * 2.0 + 2.0))
2292+
Some(&Value::F64(80.0))
22882293
);
22892294
}
22902295

@@ -2302,9 +2307,11 @@ mod tests {
23022307
"parent_span_id": "a1e13f3f06239d69",
23032308
"trace_id": "922dda2462ea4ac2b6a4b339bee90863",
23042309
"data": {
2305-
"gen_ai.usage.total_tokens": 1230,
2306-
"ai.pipeline.name": "Autofix Pipeline",
2307-
"ai.model_id": "claude-2.1"
2310+
"gen_ai.usage.input_tokens": 1000,
2311+
"gen_ai.usage.output_tokens": 2000,
2312+
"gen_ai.usage.output_tokens.reasoning": 3000,
2313+
"gen_ai.usage.input_tokens.cached": 4000,
2314+
"gen_ai.request.model": "claude-2.1"
23082315
}
23092316
},
23102317
{
@@ -2318,8 +2325,7 @@ mod tests {
23182325
"data": {
23192326
"gen_ai.usage.input_tokens": 1000,
23202327
"gen_ai.usage.output_tokens": 2000,
2321-
"ai.pipeline.name": "Autofix Pipeline",
2322-
"ai.model_id": "gpt4-21-04"
2328+
"gen_ai.request.model": "gpt4-21-04"
23232329
}
23242330
}
23252331
]
@@ -2332,25 +2338,28 @@ mod tests {
23322338
&mut event,
23332339
&NormalizationConfig {
23342340
ai_model_costs: Some(&ModelCosts {
2335-
version: 1,
2336-
costs: vec![
2337-
ModelCost {
2338-
model_id: LazyGlob::new("claude-2*"),
2339-
for_completion: false,
2340-
cost_per_1k_tokens: 1.0,
2341-
},
2342-
ModelCost {
2343-
model_id: LazyGlob::new("gpt4-21*"),
2344-
for_completion: false,
2345-
cost_per_1k_tokens: 2.0,
2346-
},
2347-
ModelCost {
2348-
model_id: LazyGlob::new("gpt4-21*"),
2349-
for_completion: true,
2350-
cost_per_1k_tokens: 20.0,
2351-
},
2352-
],
2353-
models: HashMap::new(),
2341+
version: 2,
2342+
costs: vec![],
2343+
models: HashMap::from([
2344+
(
2345+
"claude-2.1".to_owned(),
2346+
ModelCostV2 {
2347+
input_per_token: 0.01,
2348+
output_per_token: 0.02,
2349+
output_reasoning_per_token: 0.03,
2350+
input_cached_per_token: 0.0,
2351+
},
2352+
),
2353+
(
2354+
"gpt4-21-04".to_owned(),
2355+
ModelCostV2 {
2356+
input_per_token: 0.09,
2357+
output_per_token: 0.05,
2358+
output_reasoning_per_token: 0.06,
2359+
input_cached_per_token: 0.0,
2360+
},
2361+
),
2362+
]),
23542363
}),
23552364
..NormalizationConfig::default()
23562365
},
@@ -2364,15 +2373,15 @@ mod tests {
23642373
.and_then(|span| span.value())
23652374
.and_then(|span| span.data.value())
23662375
.and_then(|data| data.gen_ai_usage_total_cost.value()),
2367-
Some(&Value::F64(1.23))
2376+
Some(&Value::F64(140.0))
23682377
);
23692378
assert_eq!(
23702379
spans
23712380
.get(1)
23722381
.and_then(|span| span.value())
23732382
.and_then(|span| span.data.value())
23742383
.and_then(|data| data.gen_ai_usage_total_cost.value()),
2375-
Some(&Value::F64(20.0 * 2.0 + 2.0))
2384+
Some(&Value::F64(190.0))
23762385
);
23772386
assert_eq!(
23782387
spans

relay-event-normalization/src/normalize/span/ai.rs

Lines changed: 42 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,43 @@
11
//! AI cost calculation.
22
3-
use crate::ModelCosts;
3+
use crate::{ModelCostV2, ModelCosts};
44
use relay_event_schema::protocol::{Event, Span, SpanData};
55
use relay_protocol::{Annotated, Value};
66

7+
/// Calculates the cost of an AI model based on the model cost and the tokens used.
78
/// Calculated cost is in US dollars.
8-
fn calculate_ai_model_cost(
9-
model_id: &str,
10-
prompt_tokens_used: Option<f64>,
11-
completion_tokens_used: Option<f64>,
12-
total_tokens_used: Option<f64>,
13-
ai_model_costs: &ModelCosts,
14-
) -> Option<f64> {
15-
if let Some(cost_per_token) = ai_model_costs.cost_per_token(model_id) {
16-
let mut result = 0.0;
17-
18-
// If we have separate input/output token counts, use those
19-
if let Some(prompt_tokens) = prompt_tokens_used {
20-
result += cost_per_token.input_per_token * prompt_tokens;
21-
}
22-
if let Some(completion_tokens) = completion_tokens_used {
23-
result += cost_per_token.output_per_token * completion_tokens;
24-
}
9+
fn calculate_ai_model_cost(model_cost: Option<ModelCostV2>, data: &SpanData) -> Option<f64> {
10+
let cost_per_token = model_cost?;
11+
let input_tokens_used = data
12+
.gen_ai_usage_input_tokens
13+
.value()
14+
.and_then(Value::as_f64)
15+
.unwrap_or(0.0);
2516

26-
// If we only have total tokens and no breakdown, use input cost for all tokens
27-
// (assuming it's more common to have input cost defined in V1 configs)
28-
if prompt_tokens_used.is_none() && completion_tokens_used.is_none() {
29-
if let Some(total_tokens) = total_tokens_used {
30-
if cost_per_token.input_per_token > 0.0 {
31-
result += cost_per_token.input_per_token * total_tokens;
32-
} else if cost_per_token.output_per_token > 0.0 {
33-
result += cost_per_token.output_per_token * total_tokens;
34-
}
35-
}
36-
}
17+
let output_tokens_used = data
18+
.gen_ai_usage_output_tokens
19+
.value()
20+
.and_then(Value::as_f64)
21+
.unwrap_or(0.0);
22+
let output_reasoning_tokens_used = data
23+
.gen_ai_usage_output_tokens_reasoning
24+
.value()
25+
.and_then(Value::as_f64)
26+
.unwrap_or(0.0);
27+
let input_cached_tokens_used = data
28+
.gen_ai_usage_input_tokens_cached
29+
.value()
30+
.and_then(Value::as_f64)
31+
.unwrap_or(0.0);
3732

38-
return Some(result);
39-
}
40-
None
33+
let mut result = 0.0;
34+
35+
result += cost_per_token.input_per_token * input_tokens_used;
36+
result += cost_per_token.output_per_token * output_tokens_used;
37+
result += cost_per_token.output_reasoning_per_token * output_reasoning_tokens_used;
38+
result += cost_per_token.input_cached_per_token * input_cached_tokens_used;
39+
40+
Some(result)
4141
}
4242

4343
/// Maps AI-related measurements (legacy) to span data.
@@ -94,27 +94,17 @@ pub fn extract_ai_data(span: &mut Span, ai_model_costs: &ModelCosts) {
9494
return;
9595
};
9696

97-
let total_tokens_used = data
98-
.gen_ai_usage_total_tokens
99-
.value()
100-
.and_then(Value::as_f64);
101-
let prompt_tokens_used = data
102-
.gen_ai_usage_input_tokens
103-
.value()
104-
.and_then(Value::as_f64);
105-
let completion_tokens_used = data
106-
.gen_ai_usage_output_tokens
97+
if let Some(model_id) = data
98+
.gen_ai_request_model
10799
.value()
108-
.and_then(Value::as_f64);
109-
110-
if let Some(model_id) = data.ai_model_id.value().and_then(|val| val.as_str()) {
111-
if let Some(total_cost) = calculate_ai_model_cost(
112-
model_id,
113-
prompt_tokens_used,
114-
completion_tokens_used,
115-
total_tokens_used,
116-
ai_model_costs,
117-
) {
100+
.and_then(|val| val.as_str())
101+
// xxx (vgrozdanic): temporal fallback to legacy field, until we fix
102+
// sentry conventions and standardize what SDKs send
103+
.or_else(|| data.ai_model_id.value().and_then(|val| val.as_str()))
104+
{
105+
if let Some(total_cost) =
106+
calculate_ai_model_cost(ai_model_costs.cost_per_token(model_id), data)
107+
{
118108
data.gen_ai_usage_total_cost
119109
.set_value(Value::F64(total_cost).into());
120110
}

relay-event-schema/src/protocol/span.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,10 @@ pub struct SpanData {
494494
#[metastructure(field = "gen_ai.response.model")]
495495
pub gen_ai_response_model: Annotated<Value>,
496496

497+
/// The name of the GenAI model a request is being made to (e.g. gpt-4)
498+
#[metastructure(field = "gen_ai.request.model")]
499+
pub gen_ai_request_model: Annotated<Value>,
500+
497501
/// The total cost for the tokens used
498502
#[metastructure(field = "gen_ai.usage.total_cost", legacy_alias = "ai.total_cost")]
499503
pub gen_ai_usage_total_cost: Annotated<Value>,
@@ -1286,6 +1290,7 @@ mod tests {
12861290
gen_ai_usage_output_tokens: ~,
12871291
gen_ai_usage_output_tokens_reasoning: ~,
12881292
gen_ai_response_model: ~,
1293+
gen_ai_request_model: ~,
12891294
gen_ai_usage_total_cost: ~,
12901295
browser_name: ~,
12911296
code_filepath: String(

relay-event-schema/src/protocol/span/convert.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ mod tests {
162162
gen_ai_usage_output_tokens: ~,
163163
gen_ai_usage_output_tokens_reasoning: ~,
164164
gen_ai_response_model: ~,
165+
gen_ai_request_model: ~,
165166
gen_ai_usage_total_cost: ~,
166167
browser_name: "Chrome",
167168
code_filepath: ~,

relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ expression: "(&event.value().unwrap().spans, metrics.project_metrics)"
144144
gen_ai_usage_output_tokens: ~,
145145
gen_ai_usage_output_tokens_reasoning: ~,
146146
gen_ai_response_model: ~,
147+
gen_ai_request_model: ~,
147148
gen_ai_usage_total_cost: ~,
148149
browser_name: ~,
149150
code_filepath: ~,
@@ -806,6 +807,7 @@ expression: "(&event.value().unwrap().spans, metrics.project_metrics)"
806807
gen_ai_usage_output_tokens: ~,
807808
gen_ai_usage_output_tokens_reasoning: ~,
808809
gen_ai_response_model: ~,
810+
gen_ai_request_model: ~,
809811
gen_ai_usage_total_cost: ~,
810812
browser_name: ~,
811813
code_filepath: ~,
@@ -995,6 +997,7 @@ expression: "(&event.value().unwrap().spans, metrics.project_metrics)"
995997
gen_ai_usage_output_tokens: ~,
996998
gen_ai_usage_output_tokens_reasoning: ~,
997999
gen_ai_response_model: ~,
1000+
gen_ai_request_model: ~,
9981001
gen_ai_usage_total_cost: ~,
9991002
browser_name: ~,
10001003
code_filepath: ~,
@@ -1293,6 +1296,7 @@ expression: "(&event.value().unwrap().spans, metrics.project_metrics)"
12931296
gen_ai_usage_output_tokens: ~,
12941297
gen_ai_usage_output_tokens_reasoning: ~,
12951298
gen_ai_response_model: ~,
1299+
gen_ai_request_model: ~,
12961300
gen_ai_usage_total_cost: ~,
12971301
browser_name: ~,
12981302
code_filepath: ~,
@@ -1482,6 +1486,7 @@ expression: "(&event.value().unwrap().spans, metrics.project_metrics)"
14821486
gen_ai_usage_output_tokens: ~,
14831487
gen_ai_usage_output_tokens_reasoning: ~,
14841488
gen_ai_response_model: ~,
1489+
gen_ai_request_model: ~,
14851490
gen_ai_usage_total_cost: ~,
14861491
browser_name: ~,
14871492
code_filepath: ~,

0 commit comments

Comments
 (0)