import requests import pandas as pd from bs4 import BeautifulSoup HF_INFERENCE_PAGE = "https://huggingface.co/inference/models" def fetch_inference_page(): """ Fetch the HF inference providers page """ r = requests.get(HF_INFERENCE_PAGE, timeout=60) r.raise_for_status() return r.text def parse_provider_costs(html_content): """ Parse provider cost information from the HTML table Extracts: model, provider, input_cost, output_cost, context_length, latency, speed, tool_calling, structured_output """ soup = BeautifulSoup(html_content, 'html.parser') provider_model_data = [] # Find all table rows - try different selectors rows = soup.find_all('tr') print(f"Found {len(rows)} table rows") for idx, row in enumerate(rows): cells = row.find_all('td') if len(cells) >= 8: # Need at least 8 columns for all data # Debug: print first few rows if idx < 3: print(f"Row {idx}: {len(cells)} cells") for i, cell in enumerate(cells[:9]): print(f" Cell {i}: {cell.get_text(strip=True)[:50]}") try: # Column indices based on the table structure model_cell = cells[0].get_text(strip=True) if len(cells) > 0 else "" provider_cell = cells[1].get_text(strip=True) if len(cells) > 1 else "" input_cost_cell = cells[2].get_text(strip=True) if len(cells) > 2 else "-" output_cost_cell = cells[3].get_text(strip=True) if len(cells) > 3 else "-" context_cell = cells[4].get_text(strip=True) if len(cells) > 4 else "-" latency_cell = cells[5].get_text(strip=True) if len(cells) > 5 else "-" speed_cell = cells[6].get_text(strip=True) if len(cells) > 6 else "-" tool_calling_cell = cells[7].get_text(strip=True) if len(cells) > 7 else "-" structured_cell = cells[8].get_text(strip=True) if len(cells) > 8 else "-" # Extract model name (format: "org/model-name Copy to clipboard") model = model_cell.split("Copy to clipboard")[0].strip() if model_cell else None # Extract provider name (may have "cheapest" or "fastest" tags) provider = provider_cell.split()[0] if provider_cell else None if not provider or provider == '-' or not model: continue # Parse costs (format: $0.30 or -) input_cost = None output_cost = None if input_cost_cell and input_cost_cell != '-': try: input_cost = float(input_cost_cell.replace('$', '').replace(',', '').strip()) except ValueError: pass if output_cost_cell and output_cost_cell != '-': try: output_cost = float(output_cost_cell.replace('$', '').replace(',', '').strip()) except ValueError: pass # Parse context length (number or -) context_length = None if context_cell and context_cell != '-': try: context_length = int(context_cell.replace(',', '').strip()) except ValueError: pass # Parse latency (seconds or -) latency = None if latency_cell and latency_cell != '-': try: latency = float(latency_cell.strip()) except ValueError: pass # Parse speed (tokens/second or -) speed = None if speed_cell and speed_cell != '-': try: speed = float(speed_cell.strip()) except ValueError: pass # Parse tool calling support (Yes/No) tool_calling = tool_calling_cell if tool_calling_cell in ['Yes', 'No'] else None # Parse structured output support (Yes/No) structured_output = structured_cell if structured_cell in ['Yes', 'No'] else None # Add each (provider, model) pair with all metadata provider_model_data.append({ 'provider': provider, 'model': model, 'input_cost_per_1M': input_cost, 'output_cost_per_1M': output_cost, 'context_length': context_length, 'latency_ttft': latency, 'speed_tokens_per_sec': speed, 'tool_calling': tool_calling, 'structured_output': structured_output }) except (ValueError, IndexError) as e: if idx < 3: print(f" Error parsing row {idx}: {e}") continue print(f"Extracted {len(provider_model_data)} provider-model entries") return provider_model_data def load_provider_costs(csv_path="hf_provider_model_costs.csv"): """ Load provider-model cost data from CSV file """ df = pd.read_csv(csv_path) return df def find_best_model(prompt, csv_path="hf_provider_model_costs.csv", strategy="cheapest_total", require_both_costs=True, input_output_ratio=1.0, min_context_length=None, max_latency=None, min_speed=None, require_tool_calling=False, require_structured_output=False): """ Find the most appropriate model for a given prompt based on cost strategy. Parameters: ----------- prompt : str The user prompt (currently used for length estimation) csv_path : str Path to the CSV file with provider-model costs strategy : str Selection strategy: - "cheapest_total": Cheapest combined input+output cost - "cheapest_input": Cheapest input cost only # Demo: Find best model for a sample prompt print("\n" + "="*60) print("Example: Finding best model for a prompt") print("="*60) sample_prompt = "Explain the concept of machine learning in simple terms" strategies = ["cheapest_total", "cheapest_input", "cheapest_output", "best_value"] for strategy in strategies: result = find_best_model(sample_prompt, strategy=strategy) if result: print(f"\nStrategy: {strategy}") print(f" Provider: {result['provider']}") print(f" Model: {result['model']}") print(f" Input cost: ${result['input_cost_per_1M']:.2f}/1M tokens") print(f" Output cost: ${result['output_cost_per_1M']:.2f}/1M tokens") print(f" Estimated cost for this prompt: ${result['estimated_cost']:.6f}") - "cheapest_output": Cheapest output cost only - "best_value": Best value considering typical usage (uses input_output_ratio) require_both_costs : bool If True, only consider models with both input and output costs available input_output_ratio : float Expected input/output token ratio for "best_value" strategy (default 1.0) min_context_length : int, optional Minimum required context length max_latency : float, optional Maximum acceptable latency (TTFT in seconds) min_speed : float, optional Minimum required speed (tokens/second) require_tool_calling : bool If True, only return models that support tool calling require_structured_output : bool If True, only return models that support structured output Returns: -------- dict with keys: provider, model, input_cost_per_1M, output_cost_per_1M, estimated_cost, context_length, latency_ttft, speed_tokens_per_sec, tool_calling, structured_output """ df = load_provider_costs(csv_path) # Filter out entries based on cost availability if require_both_costs: df_filtered = df.dropna(subset=['input_cost_per_1M', 'output_cost_per_1M']).copy() else: df_filtered = df[df['input_cost_per_1M'].notna() | df['output_cost_per_1M'].notna()].copy() # Apply capability filters if min_context_length is not None: df_filtered = df_filtered[df_filtered['context_length'] >= min_context_length] if max_latency is not None: df_filtered = df_filtered[(df_filtered['latency_ttft'].notna()) & (df_filtered['latency_ttft'] <= max_latency)] if min_speed is not None: df_filtered = df_filtered[(df_filtered['speed_tokens_per_sec'].notna()) & (df_filtered['speed_tokens_per_sec'] >= min_speed)] if require_tool_calling: df_filtered = df_filtered[df_filtered['tool_calling'] == 'Yes'] if require_structured_output: df_filtered = df_filtered[df_filtered['structured_output'] == 'Yes'] if df_filtered.empty: return None # Fill NaN costs with 0 for calculation purposes df_filtered.loc[:, 'input_cost_per_1M'] = df_filtered['input_cost_per_1M'].fillna(0) df_filtered.loc[:, 'output_cost_per_1M'] = df_filtered['output_cost_per_1M'].fillna(0) # Estimate prompt length (rough approximation: 1 word ≈ 1.3 tokens) prompt_tokens = len(prompt.split()) * 1.3 / 1_000_000 # Convert to millions # Apply selection strategy if strategy == "cheapest_input": df_filtered = df_filtered[df_filtered['input_cost_per_1M'] > 0] if df_filtered.empty: return None best = df_filtered.nsmallest(1, 'input_cost_per_1M').iloc[0] estimated_cost = best['input_cost_per_1M'] * prompt_tokens elif strategy == "cheapest_output": df_filtered = df_filtered[df_filtered['output_cost_per_1M'] > 0] if df_filtered.empty: return None best = df_filtered.nsmallest(1, 'output_cost_per_1M').iloc[0] # Assume output is similar length to input estimated_cost = best['output_cost_per_1M'] * prompt_tokens elif strategy == "best_value": # Calculate weighted cost based on input/output ratio df_filtered.loc[:, 'weighted_cost'] = ( df_filtered['input_cost_per_1M'] * input_output_ratio + df_filtered['output_cost_per_1M'] ) / (input_output_ratio + 1) df_filtered = df_filtered[df_filtered['weighted_cost'] > 0] if df_filtered.empty: return None best = df_filtered.nsmallest(1, 'weighted_cost').iloc[0] estimated_cost = best['weighted_cost'] * prompt_tokens * 2 # Input + output else: # "cheapest_total" (default) df_filtered.loc[:, 'total_cost'] = df_filtered['input_cost_per_1M'] + df_filtered['output_cost_per_1M'] df_filtered = df_filtered[df_filtered['total_cost'] > 0] if df_filtered.empty: return None best = df_filtered.nsmallest(1, 'total_cost').iloc[0] estimated_cost = best['total_cost'] * prompt_tokens return { 'provider': best['provider'], 'model': best['model'], 'input_cost_per_1M': best['input_cost_per_1M'], 'output_cost_per_1M': best['output_cost_per_1M'], 'estimated_cost': estimated_cost, 'strategy': strategy, 'context_length': best.get('context_length'), 'latency_ttft': best.get('latency_ttft'), 'speed_tokens_per_sec': best.get('speed_tokens_per_sec'), 'tool_calling': best.get('tool_calling'), 'structured_output': best.get('structured_output') } def test_find_best_model(): """ Test function to demonstrate find_best_model with different prompts and strategies """ import os # Check if CSV file exists csv_path = "hf_provider_model_costs.csv" if not os.path.exists(csv_path): print(f"\nERROR: {csv_path} not found!") print("Please run the script without --test flag first to fetch and generate the CSV file.") print("Example: python eval_provider.py") return print("\n" + "="*70) print("TESTING: find_best_model function") print("="*70) test_prompts = [ "What is the capital of France?", "Write a detailed essay about the impact of artificial intelligence on modern society, covering economic, social, and ethical implications.", "Translate this text to Spanish: Hello, how are you today?" ] for i, prompt in enumerate(test_prompts, 1): print(f"\n{'='*70}") print(f"Test {i}: Prompt length = {len(prompt)} chars, ~{len(prompt.split())} words") print(f"Prompt: {prompt[:60]}...") print(f"{'='*70}") strategies = ["cheapest_total", "cheapest_input", "cheapest_output", "best_value"] for strategy in strategies: result = find_best_model(prompt, strategy=strategy, require_both_costs=True) if result: print(f"\n Strategy: {strategy.upper()}") print(f" Provider: {result['provider']}") print(f" Model: {result['model']}") print(f" Input cost: ${result['input_cost_per_1M']:.4f}/1M tokens") print(f" Output cost: ${result['output_cost_per_1M']:.4f}/1M tokens") print(f" Context length: {result['context_length'] if result['context_length'] else 'N/A'}") print(f" Latency (TTFT): {result['latency_ttft']:.2f}s" if result['latency_ttft'] else " Latency: N/A") print(f" Speed: {result['speed_tokens_per_sec']:.0f} tok/s" if result['speed_tokens_per_sec'] else " Speed: N/A") print(f" Tool calling: {result['tool_calling'] or 'N/A'}") print(f" Structured output: {result['structured_output'] or 'N/A'}") print(f" Estimated cost: ${result['estimated_cost']:.8f}") else: print(f"\n Strategy: {strategy.upper()}") print(f" No suitable model found") # Test with require_both_costs=False print(f"\n{'='*70}") print("Test: Allow models with partial cost information") print(f"{'='*70}") result = find_best_model(test_prompts[0], strategy="cheapest_total", require_both_costs=False) if result: print(f" Provider: {result['provider']}") print(f" Model: {result['model']}") print(f" Input cost: ${result['input_cost_per_1M']:.4f}/1M tokens") print(f" Output cost: ${result['output_cost_per_1M']:.4f}/1M tokens") # Test with capability filters print(f"\n{'='*70}") print("Test: Filter by capabilities (tool calling + structured output)") print(f"{'='*70}") result = find_best_model( test_prompts[0], strategy="cheapest_total", require_tool_calling=True, require_structured_output=True, min_context_length=100000 ) if result: print(f" Provider: {result['provider']}") print(f" Model: {result['model']}") print(f" Context length: {result['context_length']}") print(f" Tool calling: {result['tool_calling']}") print(f" Structured output: {result['structured_output']}") print(f" Input cost: ${result['input_cost_per_1M']:.4f}/1M tokens") print(f" Output cost: ${result['output_cost_per_1M']:.4f}/1M tokens") else: print(" No models found matching criteria") def main(): import sys # Check if we should skip fetching and just run tests if len(sys.argv) > 1 and sys.argv[1] == "--test": test_find_best_model() return print("Fetching inference providers page...") html_content = fetch_inference_page() print("Parsing provider costs...") provider_data = parse_provider_costs(html_content) if not provider_data: print("No provider cost data found.") return df = pd.DataFrame(provider_data) df = df.sort_values(["provider", "model"]) df.to_csv("hf_provider_model_costs.csv", index=False) print(f"\nFound {len(df)} provider-model combinations") unique_providers = df['provider'].nunique() unique_models = df['model'].nunique() print(f"Unique providers: {unique_providers}") print(f"Unique models: {unique_models}") print("\nProvider-Model Cost Summary (first 20 rows):") print(df.head(20).to_string(index=False)) # Run comprehensive tests test_find_best_model() if __name__ == "__main__": main()