""" LLM Playtester - Automated game playtesting using AI models. NOTE: Future Enhancement - Media-Aware Playtesting Currently, the LLM playtester only sees text (descriptions, choices, game state). It does NOT receive or process media files referenced in config states. Future local models to explore for media-aware playtesting: - Vision-Language Models (VLMs): LLaVA, Qwen2-VL, InternVL, MiniCPM-V, PaliGemma Could describe what's in scene images, evaluate visual consistency - Audio understanding: Whisper (local) + LLM for ambient audio analysis - Multimodal game testing: Feed image+text to local VLM, ask "does this image match the description?" - HuggingFace Spaces: Qwen/Qwen2-VL, OpenGVLab/InternVL2, openbmb/MiniCPM-V-2_6 Integration approach when ready: 1. Extract media paths from current state (already available via get_current_state_media()) 2. For images: encode as base64 or use VLM API 3. Add to build_playtest_prompt(): "Scene visuals: [image description or VLM analysis]" 4. Immersionist perspective would especially benefit from visual context """ import json import re from typing import List, Dict, Tuple, Optional from dataclasses import dataclass, field import spaces # Playtest perspectives - shortened versions for local LLM (token efficient) PLAYTEST_PERSPECTIVES = { "default": { "name": "Default Playtester", "system": "You are a game playtester. Follow instructions exactly.", "instruction": "Pick the most interesting choice to explore the game." }, "completionist": { "name": "The Completionist", "system": "You are a completionist player who wants to see ALL content. You methodically explore every option.", "instruction": "Pick a choice you haven't tried yet. If stuck in a loop, try something different. Note if you feel content is missing." }, "story_lover": { "name": "The Story Lover", "system": "You care deeply about narrative. You notice plot holes, character inconsistencies, and emotional beats.", "instruction": "Pick the choice that advances the story most interestingly. Note any narrative problems or inconsistencies." }, "skeptic": { "name": "The Skeptic", "system": "You question everything. You notice when choices don't make sense or feel forced.", "instruction": "Pick a choice, but note if it feels illogical or forced. Ask 'why would I do this?' for each option." }, "speedrunner": { "name": "The Speed Runner", "system": "You want to finish fast. You skip flavor text and notice padding or filler content.", "instruction": "Pick the most direct choice. Note if the game forces unnecessary steps or feels padded." }, "immersionist": { "name": "The Immersionist", "system": "You want to feel immersed. You notice thin descriptions, mood breaks, and atmosphere issues.", "instruction": "Pick what feels most natural. Note if descriptions are weak or atmosphere breaks." }, "first_timer": { "name": "The First-Timer", "system": "You're new to games and easily confused. You notice unclear directions and assumed knowledge.", "instruction": "Pick what seems clearest. Note anything confusing or unexplained. Express confusion openly." }, "replayer": { "name": "The Replayer", "system": "You've played before and want variety. You notice when different choices lead to same outcomes.", "instruction": "Pick something different from the obvious path. Note if choices feel meaningless or repetitive." }, "edge_finder": { "name": "The Edge-Case Finder", "system": "You try to break games. You make unusual choices and look for contradictions or bugs.", "instruction": "Pick the weirdest or least expected choice. Try to find loops, contradictions, or broken states." } } # Lazy torch import - only load when needed _torch = None def _ensure_torch(): global _torch if _torch is None: import torch _torch = torch return _torch # Import game engine components from my_text_game_engine_attempt import GameSession, sanitize_config_for_serialization from game_state import GameState import my_text_game_engine_attempt as game_engine # Import the model loading infrastructure import leveraging_machine_learning as ml @dataclass class PlaythroughStep: """Record of a single step in the playthrough.""" step_number: int location: str state: str description: str available_choices: List[str] choice_made: str reasoning: str state_changes: Dict = field(default_factory=dict) @dataclass class PlaythroughResult: """Complete playthrough result.""" steps: List[PlaythroughStep] = field(default_factory=list) states_visited: List[str] = field(default_factory=list) issues_found: List[str] = field(default_factory=list) final_state: Dict = field(default_factory=dict) completed: bool = False error: Optional[str] = None def build_playtest_prompt(description: str, choices: List[str], game_state, perspective: str = "default") -> str: """Build the prompt for the LLM to make a choice.""" choices_text = "\n".join([f"{i+1}. {choice}" for i, choice in enumerate(choices)]) inventory = ", ".join(game_state.inventory) if game_state.inventory else "(empty)" flags_active = [k for k, v in game_state.flags.items() if v] flags_text = ", ".join(flags_active) if flags_active else "(none)" # Get perspective-specific instruction persp = PLAYTEST_PERSPECTIVES.get(perspective, PLAYTEST_PERSPECTIVES["default"]) instruction = persp["instruction"] prompt = f"""You are playtesting a text adventure game. Select ONE choice and explain briefly. Current Location: {game_state.current_location} Scene: {description} Available choices: {choices_text} Your current state: - Inventory: {inventory} - Money: {game_state.money} - Active flags: {flags_text} Instructions: {instruction} Reply with ONLY: Choice: [number] Reason: [one sentence explanation] Issue: [any problem noticed, or "none"] Your response:""" return prompt def parse_llm_choice(response: str, num_choices: int) -> Tuple[int, str, Optional[str]]: """Parse the LLM response to extract choice number, reasoning, and any issues noted.""" choice_match = re.search(r'Choice:\s*(\d+)', response, re.IGNORECASE) reason_match = re.search(r'Reason:\s*(.+?)(?:\n|Issue:|$)', response, re.IGNORECASE | re.DOTALL) issue_match = re.search(r'Issue:\s*(.+?)(?:\n|$)', response, re.IGNORECASE | re.DOTALL) # Extract issue if present and not "none" issue = None if issue_match: issue_text = issue_match.group(1).strip().lower() if issue_text and issue_text != "none" and issue_text != "n/a": issue = issue_match.group(1).strip() if choice_match: choice_num = int(choice_match.group(1)) if 1 <= choice_num <= num_choices: reasoning = reason_match.group(1).strip() if reason_match else "No reasoning provided" return choice_num - 1, reasoning, issue # Fallback: look for any number in the response numbers = re.findall(r'\b(\d+)\b', response) for num_str in numbers: num = int(num_str) if 1 <= num <= num_choices: return num - 1, f"Chose option {num}", issue return 0, "Defaulting to first choice", issue def format_playthrough_log(result: PlaythroughResult) -> str: """Format playthrough result as readable log.""" if result.error: return f"ERROR: {result.error}" lines = ["=== Playthrough Log ===\n"] for step in result.steps: lines.append(f"Step {step.step_number}: {step.location}/{step.state}") lines.append(f" Scene: {step.description[:80]}...") lines.append(f" Choices: {len(step.available_choices)} available") lines.append(f" Chose: '{step.choice_made}'") lines.append(f" Reason: {step.reasoning}") lines.append("") lines.append(f"Total steps: {len(result.steps)}") lines.append(f"Unique states: {len(set(result.states_visited))}") lines.append(f"Completed: {'Yes' if result.completed else 'No'}") if result.final_state: lines.append(f"\nFinal State:") lines.append(f" Location: {result.final_state.get('location')}/{result.final_state.get('state')}") lines.append(f" Inventory: {result.final_state.get('inventory', [])}") lines.append(f" Money: {result.final_state.get('money', 0)}") return "\n".join(lines) def format_issues(result: PlaythroughResult) -> str: """Format issues found during playthrough.""" if result.error: return f"ERROR: {result.error}" if not result.issues_found: return "No issues found during playthrough." lines = ["=== Issues Found ===\n"] for i, issue in enumerate(result.issues_found, 1): lines.append(f"{i}. {issue}") return "\n".join(lines) @spaces.GPU def run_llm_playtest(config_json: str, model_name: str = "unsloth/Llama-3.2-1B-Instruct", max_steps: int = 20, perspective: str = "default") -> Tuple[str, str]: """ Run LLM-powered playtest of a game config. Args: config_json: The game config JSON string model_name: Name of the LLM model to use max_steps: Maximum number of steps to take perspective: Playtest perspective (default, completionist, story_lover, etc.) """ if not config_json or not config_json.strip(): return "No config provided", "Please provide a game config JSON" # Parse config try: config = json.loads(config_json) except json.JSONDecodeError as e: return f"Invalid JSON: {str(e)}", "Fix the JSON errors first" result = PlaythroughResult() # Get perspective info persp = PLAYTEST_PERSPECTIVES.get(perspective, PLAYTEST_PERSPECTIVES["default"]) perspective_name = persp["name"] system_prompt = persp["system"] # Autoload model if not loaded if ml.model is None or ml.tokenizer is None: ml.load_model(model_name) # Move model to GPU _torch = _ensure_torch() zero = _torch.Tensor([0]).cuda() ml.model.to(zero.device) # Initialize game session try: config = sanitize_config_for_serialization(config) game_engine.all_states = config starting_location = next(iter(config.keys())) starting_state = next(iter(config[starting_location].keys())) game_session = GameSession(starting_location, starting_state) except Exception as e: return f"Failed to initialize game: {str(e)}", "Check your config format" # Get initial state description, choices, _ = game_session.get_current_state_info() # Play through the game step = 0 visited_states = set() while step < max_steps and choices: current_location = game_session.current_location current_state_name = game_session.current_state state_key = f"{current_location}_{current_state_name}" # Track visited states if state_key in visited_states: result.issues_found.append(f"Step {step}: Revisited state '{state_key}' - possible loop") visited_states.add(state_key) result.states_visited.append(state_key) # Get available choices available_choices = [c for c in choices if c] if not available_choices: result.issues_found.append(f"Step {step}: No choices at '{state_key}' - dead end?") break # Build prompt and generate LLM response prompt = build_playtest_prompt(description, available_choices, game_session.game_state, perspective) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ] text = ml.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = ml.tokenizer([text], return_tensors="pt").to(zero.device) generated_ids = ml.model.generate( model_inputs.input_ids, max_new_tokens=150, temperature=0.7, do_sample=True ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = ml.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] choice_idx, reasoning, issue = parse_llm_choice(response, len(available_choices)) chosen_choice = available_choices[choice_idx] # Record perspective-specific issues if issue: result.issues_found.append(f"Step {step} [{perspective_name}]: {issue}") # Record this step step_record = PlaythroughStep( step_number=step, location=current_location, state=current_state_name, description=description[:200] + "..." if len(description) > 200 else description, available_choices=available_choices, choice_made=chosen_choice, reasoning=reasoning ) result.steps.append(step_record) # Make the choice try: choice_number = int(chosen_choice.split('.')[0]) - 1 new_desc, new_choices, _ = game_session.make_choice(choice_number) description = new_desc choices = new_choices except Exception as e: result.issues_found.append(f"Step {step}: Error making choice: {str(e)}") break step += 1 # Final state gs = game_session.game_state result.final_state = { "location": gs.current_location, "state": gs.current_state, "inventory": list(gs.inventory), "money": gs.money, "flags": {k: v for k, v in gs.flags.items() if v} } result.completed = step < max_steps and not choices if step >= max_steps: result.issues_found.append(f"Reached max steps ({max_steps}) without completing") unique_states = len(set(result.states_visited)) total_visits = len(result.states_visited) if total_visits > unique_states * 2: result.issues_found.append(f"High state repetition: {total_visits} visits to {unique_states} unique states") log = format_playthrough_log(result) log = f"=== Perspective: {perspective_name} ===\n\n" + log issues = format_issues(result) return log, issues