arterm-sedov commited on
Commit
9f00a7e
·
1 Parent(s): 97edb29

Refactor pricing calculations to use interquartile mean for OpenRouter models

Browse files

- Updated the pricing logic in `LLMManager` and `openrouter_pricing.py` to calculate interquartile mean pricing across endpoints, improving accuracy by reducing the impact of outliers.
- Adjusted related documentation to reflect the change from median to interquartile mean pricing methodology, ensuring clarity on pricing sources and calculations.
- Modified JSON pricing data for several models to align with the new interquartile mean approach.

agent_ng/llm_manager.py CHANGED
@@ -227,13 +227,13 @@ class LLMManager:
227
  if api_key:
228
  base_url = os.getenv(config.api_base_env or "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
229
  self._log_initialization(
230
- f"Fetching pricing via endpoints API for {len(model_names)} OpenRouter models (using median pricing)...", "INFO"
231
  )
232
 
233
  # Import here to avoid circular dependency
234
  from agent_ng.utils.openrouter_pricing import fetch_pricing_via_endpoints
235
 
236
- # Fetch pricing using /endpoints API and use median across endpoints
237
  pricing_map = fetch_pricing_via_endpoints(model_names, api_key, base_url)
238
  if pricing_map:
239
  pricing_source = "API"
 
227
  if api_key:
228
  base_url = os.getenv(config.api_base_env or "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
229
  self._log_initialization(
230
+ f"Fetching pricing via endpoints API for {len(model_names)} OpenRouter models (using interquartile mean pricing)...", "INFO"
231
  )
232
 
233
  # Import here to avoid circular dependency
234
  from agent_ng.utils.openrouter_pricing import fetch_pricing_via_endpoints
235
 
236
+ # Fetch pricing using /endpoints API and use interquartile mean across endpoints
237
  pricing_map = fetch_pricing_via_endpoints(model_names, api_key, base_url)
238
  if pricing_map:
239
  pricing_source = "API"
agent_ng/openrouter_pricing.json CHANGED
@@ -1,27 +1,27 @@
1
  {
2
  "deepseek/deepseek-v3.1-terminus:exacto": {
3
- "prompt_price_per_1k": 0.00025,
4
- "completion_price_per_1k": 0.00095
5
  },
6
  "deepseek/deepseek-v3.1-terminus": {
7
- "prompt_price_per_1k": 0.00025,
8
- "completion_price_per_1k": 0.00095
9
  },
10
  "deepseek/deepseek-v3.2-speciale": {
11
- "prompt_price_per_1k": 0.00028000000000000003,
12
- "completion_price_per_1k": 0.00041
13
  },
14
  "deepseek/deepseek-chat-v3.1:free": {
15
- "prompt_price_per_1k": 0.00027,
16
- "completion_price_per_1k": 0.001
17
  },
18
  "deepseek/deepseek-chat-v3.1": {
19
- "prompt_price_per_1k": 0.00027,
20
- "completion_price_per_1k": 0.001
21
  },
22
  "deepseek/deepseek-r1-0528": {
23
- "prompt_price_per_1k": 0.0007999999999999999,
24
- "completion_price_per_1k": 0.0025
25
  },
26
  "x-ai/grok-4-fast:free": {
27
  "prompt_price_per_1k": 0.00019999999999999998,
@@ -36,12 +36,12 @@
36
  "completion_price_per_1k": 0.0015
37
  },
38
  "qwen/qwen3-coder:free": {
39
- "prompt_price_per_1k": 0.00039999999999999996,
40
- "completion_price_per_1k": 0.0017
41
  },
42
  "qwen/qwen3-coder": {
43
- "prompt_price_per_1k": 0.00039999999999999996,
44
- "completion_price_per_1k": 0.0017
45
  },
46
  "qwen/qwen3-coder-flash": {
47
  "prompt_price_per_1k": 0.0003,
@@ -56,36 +56,36 @@
56
  "completion_price_per_1k": 0.005
57
  },
58
  "qwen/qwen3-coder:exacto": {
59
- "prompt_price_per_1k": 0.00039999999999999996,
60
- "completion_price_per_1k": 0.0017
61
  },
62
  "qwen/qwen-plus-2025-07-28": {
63
  "prompt_price_per_1k": 0.00039999999999999996,
64
  "completion_price_per_1k": 0.0012
65
  },
66
  "moonshotai/kimi-k2-0905:exacto": {
67
- "prompt_price_per_1k": 0.0006,
68
- "completion_price_per_1k": 0.0025
69
  },
70
  "moonshotai/kimi-k2-0905": {
71
- "prompt_price_per_1k": 0.0006,
72
- "completion_price_per_1k": 0.0025
73
  },
74
  "moonshotai/kimi-k2-thinking": {
75
- "prompt_price_per_1k": 0.0006,
76
  "completion_price_per_1k": 0.0025
77
  },
78
  "z-ai/glm-4.6:exacto": {
79
- "prompt_price_per_1k": 0.00055,
80
- "completion_price_per_1k": 0.0021899999999999997
81
  },
82
  "z-ai/glm-4.6": {
83
- "prompt_price_per_1k": 0.00055,
84
- "completion_price_per_1k": 0.0021899999999999997
85
  },
86
  "z-ai/glm-4.7": {
87
- "prompt_price_per_1k": 0.00050375,
88
- "completion_price_per_1k": 0.0020499999999999997
89
  },
90
  "google/gemini-3-flash-preview": {
91
  "prompt_price_per_1k": 0.0005,
@@ -96,16 +96,16 @@
96
  "completion_price_per_1k": 0.012
97
  },
98
  "anthropic/claude-sonnet-4.5": {
99
- "prompt_price_per_1k": 0.003,
100
  "completion_price_per_1k": 0.015000000000000001
101
  },
102
  "openai/gpt-oss-120b:exacto": {
103
- "prompt_price_per_1k": 9.999999999999999e-05,
104
- "completion_price_per_1k": 0.0006
105
  },
106
  "openai/gpt-oss-120b": {
107
- "prompt_price_per_1k": 9.999999999999999e-05,
108
- "completion_price_per_1k": 0.0006
109
  },
110
  "openai/gpt-5-mini": {
111
  "prompt_price_per_1k": 0.00025,
@@ -124,7 +124,7 @@
124
  "completion_price_per_1k": 0.0009
125
  },
126
  "minimax/minimax-m2.1": {
127
- "prompt_price_per_1k": 0.0003,
128
  "completion_price_per_1k": 0.0012
129
  }
130
  }
 
1
  {
2
  "deepseek/deepseek-v3.1-terminus:exacto": {
3
+ "prompt_price_per_1k": 0.000245,
4
+ "completion_price_per_1k": 0.000925
5
  },
6
  "deepseek/deepseek-v3.1-terminus": {
7
+ "prompt_price_per_1k": 0.000245,
8
+ "completion_price_per_1k": 0.000925
9
  },
10
  "deepseek/deepseek-v3.2-speciale": {
11
+ "prompt_price_per_1k": 0.00031666666666666665,
12
+ "completion_price_per_1k": 0.0006699999999999999
13
  },
14
  "deepseek/deepseek-chat-v3.1:free": {
15
+ "prompt_price_per_1k": 0.00034500000000000004,
16
+ "completion_price_per_1k": 0.001125
17
  },
18
  "deepseek/deepseek-chat-v3.1": {
19
+ "prompt_price_per_1k": 0.00034500000000000004,
20
+ "completion_price_per_1k": 0.001125
21
  },
22
  "deepseek/deepseek-r1-0528": {
23
+ "prompt_price_per_1k": 0.0012642857142857143,
24
+ "completion_price_per_1k": 0.003375714285714286
25
  },
26
  "x-ai/grok-4-fast:free": {
27
  "prompt_price_per_1k": 0.00019999999999999998,
 
36
  "completion_price_per_1k": 0.0015
37
  },
38
  "qwen/qwen3-coder:free": {
39
+ "prompt_price_per_1k": 0.0004875,
40
+ "completion_price_per_1k": 0.00166625
41
  },
42
  "qwen/qwen3-coder": {
43
+ "prompt_price_per_1k": 0.0004875,
44
+ "completion_price_per_1k": 0.00166625
45
  },
46
  "qwen/qwen3-coder-flash": {
47
  "prompt_price_per_1k": 0.0003,
 
56
  "completion_price_per_1k": 0.005
57
  },
58
  "qwen/qwen3-coder:exacto": {
59
+ "prompt_price_per_1k": 0.0004875,
60
+ "completion_price_per_1k": 0.00166625
61
  },
62
  "qwen/qwen-plus-2025-07-28": {
63
  "prompt_price_per_1k": 0.00039999999999999996,
64
  "completion_price_per_1k": 0.0012
65
  },
66
  "moonshotai/kimi-k2-0905:exacto": {
67
+ "prompt_price_per_1k": 0.0006557142857142856,
68
+ "completion_price_per_1k": 0.0026414285714285715
69
  },
70
  "moonshotai/kimi-k2-0905": {
71
+ "prompt_price_per_1k": 0.0006557142857142856,
72
+ "completion_price_per_1k": 0.0026414285714285715
73
  },
74
  "moonshotai/kimi-k2-thinking": {
75
+ "prompt_price_per_1k": 0.0005928571428571429,
76
  "completion_price_per_1k": 0.0025
77
  },
78
  "z-ai/glm-4.6:exacto": {
79
+ "prompt_price_per_1k": 0.0005153571428571428,
80
+ "completion_price_per_1k": 0.0020628571428571428
81
  },
82
  "z-ai/glm-4.6": {
83
+ "prompt_price_per_1k": 0.0005153571428571428,
84
+ "completion_price_per_1k": 0.0020628571428571428
85
  },
86
  "z-ai/glm-4.7": {
87
+ "prompt_price_per_1k": 0.0005034375,
88
+ "completion_price_per_1k": 0.002085
89
  },
90
  "google/gemini-3-flash-preview": {
91
  "prompt_price_per_1k": 0.0005,
 
96
  "completion_price_per_1k": 0.012
97
  },
98
  "anthropic/claude-sonnet-4.5": {
99
+ "prompt_price_per_1k": 0.0030000000000000005,
100
  "completion_price_per_1k": 0.015000000000000001
101
  },
102
  "openai/gpt-oss-120b:exacto": {
103
+ "prompt_price_per_1k": 0.00012,
104
+ "completion_price_per_1k": 0.0005236363636363636
105
  },
106
  "openai/gpt-oss-120b": {
107
+ "prompt_price_per_1k": 0.00012,
108
+ "completion_price_per_1k": 0.0005236363636363636
109
  },
110
  "openai/gpt-5-mini": {
111
  "prompt_price_per_1k": 0.00025,
 
124
  "completion_price_per_1k": 0.0009
125
  },
126
  "minimax/minimax-m2.1": {
127
+ "prompt_price_per_1k": 0.00029857142857142853,
128
  "completion_price_per_1k": 0.0012
129
  }
130
  }
agent_ng/utils/openrouter_pricing.py CHANGED
@@ -139,17 +139,18 @@ def extract_pricing_from_model(model: dict[str, Any]) -> tuple[float, float]:
139
  return (prompt_per_1k, completion_per_1k)
140
 
141
 
142
- def median_endpoint_pricing(endpoints: list[dict[str, Any]]) -> tuple[float, float]:
143
- """Calculate median pricing across multiple endpoints.
144
 
145
- Uses median instead of average to be less affected by outliers and better
146
- reflect typical costs users experience.
 
147
 
148
  Args:
149
  endpoints: List of endpoint dictionaries
150
 
151
  Returns:
152
- Tuple of (median_prompt_price_per_1k, median_completion_price_per_1k) in USD
153
  """
154
  if not endpoints:
155
  return (0.0, 0.0)
@@ -171,23 +172,31 @@ def median_endpoint_pricing(endpoints: list[dict[str, Any]]) -> tuple[float, flo
171
  if completion_per_token > 0:
172
  completion_prices.append(completion_per_token * 1000.0) # Convert per token to per 1K
173
 
174
- # Calculate median (middle value when sorted)
175
- def _median(values: list[float]) -> float:
176
  if not values:
177
  return 0.0
 
 
 
 
178
  sorted_values = sorted(values)
179
  n = len(sorted_values)
180
- if n % 2 == 0:
181
- # Even number of values: average of two middle values
182
- return (sorted_values[n // 2 - 1] + sorted_values[n // 2]) / 2.0
183
- else:
184
- # Odd number of values: middle value
185
- return sorted_values[n // 2]
186
 
187
- median_prompt = _median(prompt_prices)
188
- median_completion = _median(completion_prices)
 
 
 
 
 
 
 
 
 
 
189
 
190
- return (median_prompt, median_completion)
191
 
192
 
193
  def parse_model_slug(model_slug: str) -> tuple[str | None, str | None]:
@@ -324,8 +333,8 @@ def fetch_pricing_via_endpoints(
324
  logger.debug("No endpoints found for %s/%s", author, model_slug)
325
  continue
326
 
327
- # Median pricing across endpoints
328
- prompt_price, completion_price = median_endpoint_pricing(endpoints)
329
  if prompt_price > 0 or completion_price > 0:
330
  pricing_map[model_name] = {
331
  "prompt_price_per_1k": prompt_price,
@@ -415,10 +424,10 @@ def fetch_pricing_for_models(
415
  if author and model_slug:
416
  endpoints = fetch_model_endpoints(author, model_slug, api_key, base_url)
417
  if endpoints:
418
- prompt_price, completion_price = median_endpoint_pricing(endpoints)
419
  if prompt_price > 0 or completion_price > 0:
420
  logger.info(
421
- "Model %s: prompt=$%.6f/1K, completion=$%.6f/1K (median from %d endpoints)",
422
  slug,
423
  prompt_price,
424
  completion_price,
@@ -498,10 +507,10 @@ def update_llm_config_with_pricing(
498
  if author and model_slug:
499
  endpoints = fetch_model_endpoints(author, model_slug, api_key, base_url)
500
  if endpoints:
501
- prompt_price, completion_price = median_endpoint_pricing(endpoints)
502
  if prompt_price > 0 or completion_price > 0:
503
  logger.info(
504
- "Model %s: prompt=$%.6f/1K, completion=$%.6f/1K (median from %d endpoints)",
505
  slug,
506
  prompt_price,
507
  completion_price,
@@ -609,7 +618,7 @@ def main() -> None:
609
  sys.exit(1)
610
 
611
  model_names = [m.get("model", "") for m in config.models if m.get("model")]
612
- logger.info("Fetching pricing via endpoints API for %d models (using median pricing)...", len(model_names))
613
  pricing_map = fetch_pricing_via_endpoints(model_names, api_key, base_url)
614
  except Exception as exc: # pragma: no cover - CLI helper only
615
  logger.exception("Failed to fetch pricing: %s", exc)
 
139
  return (prompt_per_1k, completion_per_1k)
140
 
141
 
142
+ def interquartile_mean_endpoint_pricing(endpoints: list[dict[str, Any]]) -> tuple[float, float]:
143
+ """Calculate interquartile mean pricing across multiple endpoints.
144
 
145
+ Uses interquartile mean (average of 25th-75th percentile) to remove outliers
146
+ on both ends while better reflecting typical costs users experience.
147
+ More robust than average, more representative than median.
148
 
149
  Args:
150
  endpoints: List of endpoint dictionaries
151
 
152
  Returns:
153
+ Tuple of (iq_mean_prompt_price_per_1k, iq_mean_completion_price_per_1k) in USD
154
  """
155
  if not endpoints:
156
  return (0.0, 0.0)
 
172
  if completion_per_token > 0:
173
  completion_prices.append(completion_per_token * 1000.0) # Convert per token to per 1K
174
 
175
+ def _interquartile_mean(values: list[float]) -> float:
176
+ """Calculate interquartile mean (average of 25th-75th percentile)."""
177
  if not values:
178
  return 0.0
179
+ if len(values) <= 2:
180
+ # Too few values, just return average
181
+ return sum(values) / len(values)
182
+
183
  sorted_values = sorted(values)
184
  n = len(sorted_values)
 
 
 
 
 
 
185
 
186
+ # Calculate quartile indices
187
+ q1_idx = n // 4
188
+ q3_idx = (3 * n) // 4
189
+
190
+ # Get interquartile range (25th to 75th percentile)
191
+ iq_range = sorted_values[q1_idx:q3_idx + 1]
192
+
193
+ # Return average of interquartile range
194
+ return sum(iq_range) / len(iq_range)
195
+
196
+ iq_mean_prompt = _interquartile_mean(prompt_prices)
197
+ iq_mean_completion = _interquartile_mean(completion_prices)
198
 
199
+ return (iq_mean_prompt, iq_mean_completion)
200
 
201
 
202
  def parse_model_slug(model_slug: str) -> tuple[str | None, str | None]:
 
333
  logger.debug("No endpoints found for %s/%s", author, model_slug)
334
  continue
335
 
336
+ # Interquartile mean pricing across endpoints (removes outliers)
337
+ prompt_price, completion_price = interquartile_mean_endpoint_pricing(endpoints)
338
  if prompt_price > 0 or completion_price > 0:
339
  pricing_map[model_name] = {
340
  "prompt_price_per_1k": prompt_price,
 
424
  if author and model_slug:
425
  endpoints = fetch_model_endpoints(author, model_slug, api_key, base_url)
426
  if endpoints:
427
+ prompt_price, completion_price = interquartile_mean_endpoint_pricing(endpoints)
428
  if prompt_price > 0 or completion_price > 0:
429
  logger.info(
430
+ "Model %s: prompt=$%.6f/1K, completion=$%.6f/1K (interquartile mean from %d endpoints)",
431
  slug,
432
  prompt_price,
433
  completion_price,
 
507
  if author and model_slug:
508
  endpoints = fetch_model_endpoints(author, model_slug, api_key, base_url)
509
  if endpoints:
510
+ prompt_price, completion_price = interquartile_mean_endpoint_pricing(endpoints)
511
  if prompt_price > 0 or completion_price > 0:
512
  logger.info(
513
+ "Model %s: prompt=$%.6f/1K, completion=$%.6f/1K (interquartile mean from %d endpoints)",
514
  slug,
515
  prompt_price,
516
  completion_price,
 
618
  sys.exit(1)
619
 
620
  model_names = [m.get("model", "") for m in config.models if m.get("model")]
621
+ logger.info("Fetching pricing via endpoints API for %d models (using interquartile mean pricing)...", len(model_names))
622
  pricing_map = fetch_pricing_via_endpoints(model_names, api_key, base_url)
623
  except Exception as exc: # pragma: no cover - CLI helper only
624
  logger.exception("Failed to fetch pricing: %s", exc)
docs/OPENROUTER_PRICING.md CHANGED
@@ -4,7 +4,7 @@
4
 
5
  - **Pricing source**: For OpenRouter models we use the `/endpoints` API
6
  [`GET /models/{author}/{slug}/endpoints`](https://openrouter.ai/docs/api/api-reference/endpoints/list-endpoints)
7
- which provides endpoint-specific pricing. We use **median pricing** across all endpoints for each model to get realistic pricing (less affected by outliers than average).
8
  - **API format**: OpenRouter API returns prices **per token** (e.g., `"0.00003"` = $0.00003 per token).
9
  We convert to per 1K tokens: `price_per_1k = price_per_token * 1000`
10
  - **When**: Once per agent run, at startup, inside `LLMManager` (if enabled)
@@ -20,7 +20,8 @@
20
  3. **Fallback chain** (tries each in order until pricing is found):
21
  - **Step 1: API fetch** (if enabled):
22
  - Fetches endpoints for each configured model from `/models/{author}/{slug}/endpoints`
23
- - Uses median pricing across all endpoints for each model (less affected by outliers)
 
24
  - Updates model configs in memory: `prompt_price_per_1k`, `completion_price_per_1k`
25
  - **Step 2: JSON snapshot** (if API fails or disabled):
26
  - Loads pricing from `agent_ng/openrouter_pricing.json` (if exists)
 
4
 
5
  - **Pricing source**: For OpenRouter models we use the `/endpoints` API
6
  [`GET /models/{author}/{slug}/endpoints`](https://openrouter.ai/docs/api/api-reference/endpoints/list-endpoints)
7
+ which provides endpoint-specific pricing. We use **interquartile mean** (average of 25th-75th percentile) across all endpoints for each model to get realistic pricing that removes outliers on both ends while better reflecting typical costs.
8
  - **API format**: OpenRouter API returns prices **per token** (e.g., `"0.00003"` = $0.00003 per token).
9
  We convert to per 1K tokens: `price_per_1k = price_per_token * 1000`
10
  - **When**: Once per agent run, at startup, inside `LLMManager` (if enabled)
 
20
  3. **Fallback chain** (tries each in order until pricing is found):
21
  - **Step 1: API fetch** (if enabled):
22
  - Fetches endpoints for each configured model from `/models/{author}/{slug}/endpoints`
23
+ - Uses interquartile mean pricing (average of 25th-75th percentile) across all endpoints for each model
24
+ - Removes outliers on both ends while reflecting typical costs
25
  - Updates model configs in memory: `prompt_price_per_1k`, `completion_price_per_1k`
26
  - **Step 2: JSON snapshot** (if API fails or disabled):
27
  - Loads pricing from `agent_ng/openrouter_pricing.json` (if exists)