"""
LastBrain: Survive everything.
═══════════════════════════════════════════════════════════════
3-Stage MARL Mobile Pipeline for Small Models (≤4B) on T4 GPU.

A: Raw model output (single call)
B: LastBrain 3-stage pipeline (Hypothesis → Draft+Audit → Adversarial Refine)
Judge: GPT-5.4 scores both outputs 0-100.

Built by VIDRAFT — https://vidraft.net
"""

import json
import os
import time
import random
import torch
import gradio as gr
from dataclasses import dataclass, field
from typing import Callable, Dict, Optional
from threading import Lock
from transformers import AutoTokenizer, AutoModelForCausalLM

# ═══════════════════════════════════════════════════════════════
# LASTBRAIN CORE — 3-Stage Pipeline
# ═══════════════════════════════════════════════════════════════

@dataclass
class LastBrainConfig:
    mode: str = "insight"
    emergence_type: str = "invent"
    s1_budget: int = 256
    s2_budget: int = 2048
    s5_budget: int = 2048
    s1_temp: float = 0.7
    s2_temp: float = 0.5
    s5_temp: float = 0.4
    auto_continue: bool = True
    continue_budget: int = 1024
    include_trace: bool = True

@dataclass
class LastBrainResult:
    answer: str = ""
    raw_answer: str = ""
    trace: Dict[str, str] = field(default_factory=dict)
    elapsed: float = 0.0
    stages_elapsed: Dict[str, float] = field(default_factory=dict)

S1_SYSTEM = """You are S1_Hypothesis — Trap & Angle Detector.
Your ONLY job: find hidden traps, contradictions, and the best angle of attack.
Output EXACTLY 3 bullets, nothing more:
(1) Core trap or hidden assumption
(2) Key contradiction or missing nuance
(3) Best angle to approach this correctly
[BUDGET: 80 words MAX]"""

S2_SYSTEM = """You are S2_DraftAuditor — Solver + Self-Checker.
Write a COMPLETE answer to the task.
RULES:
- After EVERY major claim, add [CHECK: brief self-verification]
- If you're uncertain, say so explicitly
- At the end, add [GAPS: anything you might have missed]
Be thorough but concise."""

S5_SYSTEM = """You are S5_AdversarialRefiner — Final Quality Gate.
You receive a draft answer with [CHECK] tags.
Your job:
1. Hunt for hallucination, overconfidence, logical errors, missing nuance
2. Fix ALL errors silently
3. Write a COMPLETE NEW final answer (not patches)
4. Remove all [CHECK] and [GAPS] tags — output clean text only
The user sees ONLY your output. Make it perfect."""

MODE_SEEDS = {
    "insight": [
        "Look for common misconceptions that sound plausible",
        "Check if the question contains a hidden false premise",
        "Consider edge cases that change the answer completely",
        "Verify if popular beliefs contradict scientific evidence",
    ],
    "invent": [
        "[TRIZ] Segmentation: divide object into independent parts",
        "[TRIZ] Asymmetry: change symmetrical form to asymmetrical",
        "[BIO] Spider silk: tensile strength 5x steel at 1/6 density",
        "[CONTRADICTION] Strength vs Flexibility — resolve via hierarchy",
    ],
    "create": [
        "[TROPE] Chosen One -> The chosen one was chosen by mistake",
        "[PARADOX] Bootstrap: effect precedes its own cause",
        "[GENRE] Horror x Comedy -> terror played completely straight by funny people",
        "[SENSE] Synesthesia: what does the color of loneliness taste like?",
    ],
    "recipe": [
        "[FLAVOR] Umami x Acid -> fermented + citrus bridge",
        "[TEXTURE] Crispy outside x Molten inside -> temperature contrast",
        "[METHOD] Sous-vide precision x Wok-hei chaos -> controlled disorder",
    ],
    "pharma": [
        "[TARGET] Repurpose: existing approved drug -> new disease indication",
        "[MECHANISM] Checkpoint inhibitor logic -> apply to neurodegeneration",
        "[DELIVERY] Nanoparticle encapsulation -> cross blood-brain barrier",
    ],
    "genomics": [
        "[LETHALITY] Synthetic lethality: gene A + gene B knockout = selective kill",
        "[PATHWAY] Crosstalk: MAPK <-> PI3K interaction in resistance",
        "[PLATFORM] CRISPR base editing -> single nucleotide precision",
    ],
    "chemistry": [
        "[PROPERTY] Contradictory: hard + flexible simultaneously via gradient",
        "[SCALE] Nano-property -> macro-application via self-assembly",
        "[BIO] Nacre structure: brick-and-mortar -> 3000x toughness increase",
    ],
    "ecology": [
        "[TRANSFER] Island conservation success -> apply to urban fragment",
        "[INVERSION] Invasive threat -> commercial resource (lionfish -> sashimi)",
        "[STACK] Single intervention -> carbon + water + food + mental health",
    ],
    "law": [
        "[JURISDICTION] EU strict liability vs US negligence -> hybrid framework",
        "[COLLISION] AI-generated content x copyright law -> new doctrine needed",
        "[TRANSPLANT] Data privacy framework -> apply to genetic data",
    ],
    "document": [
        "[STRUCTURE] Argue BOTH sides before concluding",
        "[EVIDENCE] Every claim needs a verifiable source or explicit uncertainty",
        "[DILEMMA] Present the core tradeoff the reader must decide",
    ],
}


class LastBrain:
    """3-stage MARL pipeline: S1 Hypothesis -> S2 Draft+Audit -> S5 Adversarial Refine"""

    def __init__(self, call_fn: Callable, config: Optional[LastBrainConfig] = None):
        self.call_fn = call_fn
        self.config = config or LastBrainConfig()

    def _get_seeds(self, prompt):
        mode = self.config.emergence_type if self.config.mode == "emergence" else "insight"
        seeds = MODE_SEEDS.get(mode, MODE_SEEDS["insight"])
        return "\n".join(random.sample(seeds, min(2, len(seeds))))

    def _detect_truncation(self, text):
        if not text or len(text) < 20:
            return False
        if text[-1] == '\n':
            return False
        t = text.rstrip()
        if len(t) < 10:
            return False
        return t[-1] not in '.!?)"\':\n'

    def run(self, prompt, system_context=""):
        start = time.time()
        trace, stages_elapsed = {}, {}
        full_prompt = f"[Context]\n{system_context}\n\n[Task]\n{prompt}" if system_context else prompt
        seeds = self._get_seeds(prompt)

        # S1: Hypothesis + Seeds
        t1 = time.time()
        mode_label = self.config.emergence_type.upper() if self.config.mode == "emergence" else "INSIGHT"
        s1_out = self.call_fn(full_prompt, f"{S1_SYSTEM}\n\n[MODE: {mode_label}]\n[SEEDS]\n{seeds}",
                              self.config.s1_budget, self.config.s1_temp)
        trace["S1_Hypothesis"] = s1_out
        stages_elapsed["S1"] = time.time() - t1

        # S2: Draft + Inline Audit
        t2 = time.time()
        s2_ctx = f"[S1 ANALYSIS]\n{s1_out[:300]}\n\n" if s1_out and not s1_out.startswith("[ERROR") else ""
        s2_out = self.call_fn(f"{s2_ctx}[TASK]\n{full_prompt}", S2_SYSTEM,
                              self.config.s2_budget, self.config.s2_temp)
        if self.config.auto_continue and self._detect_truncation(s2_out):
            cont = self.call_fn(
                f"You were writing but got CUT OFF. Last part:\n---\n{s2_out[-500:]}\n---\n"
                f"CONTINUE from exactly where you stopped. Do NOT repeat.",
                "You are S2_DraftAuditor continuing. Be concise.",
                self.config.continue_budget, self.config.s2_temp)
            if cont and not cont.startswith("[ERROR"):
                s2_out = s2_out + "\n" + cont
        trace["S2_DraftAudit"] = s2_out
        stages_elapsed["S2"] = time.time() - t2

        # S5: Adversarial Refine
        t5 = time.time()
        s2_compressed = s2_out[:1500]
        if len(s2_out) > 1500:
            s2_compressed += "\n[... draft truncated — write your OWN complete version]"
        s5_prompt = (f"[ORIGINAL TASK]\n{prompt}\n\n"
                     f"[S1 TRAPS]\n{s1_out[:200] if s1_out else 'none'}\n\n"
                     f"[S2 DRAFT — reference only, write your OWN]\n{s2_compressed}")
        s5_out = self.call_fn(s5_prompt, S5_SYSTEM, self.config.s5_budget, self.config.s5_temp)
        if self.config.auto_continue and self._detect_truncation(s5_out):
            cont = self.call_fn(
                f"You were writing the FINAL ANSWER but got CUT OFF:\n---\n{s5_out[-500:]}\n---\n"
                f"CONTINUE from exactly where you stopped. Complete ALL remaining items.",
                "You are S5_AdversarialRefiner continuing. Clean final text only.",
                self.config.continue_budget, self.config.s5_temp)
            if cont and not cont.startswith("[ERROR"):
                s5_out = s5_out + "\n" + cont
        trace["S5_AdversarialRefine"] = s5_out
        stages_elapsed["S5"] = time.time() - t5

        answer = s5_out if s5_out and not s5_out.startswith("[ERROR") else s2_out
        return LastBrainResult(answer=answer, raw_answer=s2_out, trace=trace,
                               elapsed=time.time() - start, stages_elapsed=stages_elapsed)


# ═══════════════════════════════════════════════════════════════
# MODELS
# ═══════════════════════════════════════════════════════════════
MODELS = {
    "google/gemma-3n-e4b-it": {
        "name": "Gemma-3n-E4B", "params": "4B (2B active)", "ram": "~2GB",
        "why": "Self-correction 89, Metacognition 90 — optimal for LastBrain",
        "score": 87.5, "badge": "Most Balanced",
    },
    "Qwen/Qwen3-4B": {
        "name": "Qwen3-4B", "params": "4B", "ram": "~2.8GB",
        "why": "Trap detection 100, Math 100, Coding 100 — strongest reasoning",
        "score": 86.9, "badge": "Best Reasoning",
    },
    "Qwen/Qwen3-1.7B": {
        "name": "Qwen3-1.7B", "params": "1.7B", "ram": "~1.2GB",
        "why": "Metacognition 90 at 1.7B — ultralight mobile champion",
        "score": 76.8, "badge": "Lightest",
    },
}

loaded_model = {"id": None, "tokenizer": None, "model": None}
model_lock = Lock()


# ═══════════════════════════════════════════════════════════════
# GPU MODEL LOADING
# ═══════════════════════════════════════════════════════════════
def load_model(model_id, progress=gr.Progress()):
    global loaded_model
    if not model_id or model_id not in MODELS:
        return "Select a model first."
    with model_lock:
        if loaded_model["id"] == model_id:
            return f"Already loaded: {MODELS[model_id]['name']}"
        progress(0.1, desc="Clearing GPU memory...")
        if loaded_model["model"] is not None:
            del loaded_model["model"]; del loaded_model["tokenizer"]
            torch.cuda.empty_cache()
        progress(0.3, desc=f"Loading {MODELS[model_id]['name']}...")
        try:
            tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
            mdl = AutoModelForCausalLM.from_pretrained(
                model_id, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
            loaded_model.update({"id": model_id, "tokenizer": tok, "model": mdl})
            vram = torch.cuda.memory_allocated() / 1024**3
            progress(1.0, desc="Done!")
            return f"{MODELS[model_id]['name']} loaded | VRAM: {vram:.1f}GB"
        except Exception as e:
            loaded_model.update({"id": None, "tokenizer": None, "model": None})
            return f"Failed: {str(e)[:200]}"


# ═══════════════════════════════════════════════════════════════
# LOCAL INFERENCE
# ═══════════════════════════════════════════════════════════════
def local_generate(prompt, system="", max_tokens=2048, temperature=0.5):
    if loaded_model["model"] is None:
        return "[ERROR] No model loaded"
    tok, mdl = loaded_model["tokenizer"], loaded_model["model"]
    messages = []
    if system: messages.append({"role": "system", "content": system})
    messages.append({"role": "user", "content": prompt})
    try:
        text = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    except Exception:
        text = f"{system}\n\n{prompt}" if system else prompt
    inputs = tok(text, return_tensors="pt", truncation=True, max_length=4096).to(mdl.device)
    with torch.no_grad():
        outputs = mdl.generate(**inputs, max_new_tokens=max_tokens, temperature=max(temperature, 0.01),
                               do_sample=True, top_p=0.9, repetition_penalty=1.1,
                               pad_token_id=tok.eos_token_id)
    return tok.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()


# ═══════════════════════════════════════════════════════════════
# GPT-5.4 JUDGE
# ═══════════════════════════════════════════════════════════════
JUDGE_PROMPT = """You are an expert AI evaluator. Score two AI responses to the same question.

QUESTION:
{question}

RESPONSE A (Raw model, single call):
{response_a}

RESPONSE B (LastBrain, 3-stage pipeline):
{response_b}

Score each response on these criteria (0-100):
1. Accuracy: Are facts correct? Any hallucination?
2. Completeness: Are all aspects addressed?
3. Self-awareness: Does it acknowledge uncertainty when appropriate?
4. Reasoning depth: Is reasoning thorough and multi-layered?

Respond ONLY with JSON:
{{"score_a":{{"accuracy":N,"completeness":N,"self_awareness":N,"reasoning":N,"total":N}},"score_b":{{"accuracy":N,"completeness":N,"self_awareness":N,"reasoning":N,"total":N}},"winner":"A" or "B" or "TIE","reason":"one sentence"}}"""

def judge_with_gpt(question, response_a, response_b, api_key):
    if not api_key: return {"error": "OpenAI API key required for judging"}
    import requests
    try:
        r = requests.post("https://api.openai.com/v1/chat/completions",
            headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
            json={"model": "gpt-5.4", "max_completion_tokens": 500, "temperature": 0,
                  "messages": [{"role": "user", "content": JUDGE_PROMPT.format(
                      question=question[:1000], response_a=response_a[:2000], response_b=response_b[:2000])}]},
            timeout=30)
        text = r.json()["choices"][0]["message"]["content"].strip().replace("```json","").replace("```","").strip()
        return json.loads(text)
    except Exception as e:
        return {"error": str(e)[:200]}


# ═══════════════════════════════════════════════════════════════
# A/B TEST
# ═══════════════════════════════════════════════════════════════
def run_ab_test(prompt, mode, etype, api_key, progress=gr.Progress()):
    if loaded_model["model"] is None:
        yield "Load a model first.", "", "", "", ""
        return
    mn = MODELS.get(loaded_model["id"], {}).get("name", "?")

    # Raw
    progress(0.1, desc="[A] Raw generating...")
    t0 = time.time()
    raw = local_generate(prompt, "Answer thoroughly and accurately.", 2048, 0.5)
    t_raw = time.time() - t0

    # LastBrain
    progress(0.4, desc="[B] LastBrain S1...")
    cfg = LastBrainConfig(mode="emergence" if mode == "Emergence" else "insight",
                          emergence_type=etype.lower() if etype else "invent")
    lb = LastBrain(local_generate, cfg)
    result = lb.run(prompt)
    t_lb = result.elapsed

    se = result.stages_elapsed
    status = f"""<div style="padding:12px;background:linear-gradient(135deg,#0f172a,#1e293b);border-radius:10px;color:#f1f5f9;font-family:'JetBrains Mono',monospace;font-size:12px">
<span style="color:#22d3ee;font-weight:700">LastBrain</span> <span style="color:#64748b">|</span> {mn}
<span style="color:#64748b">|</span> Raw: <span style="color:#fbbf24">{t_raw:.1f}s</span>
<span style="color:#64748b">|</span> LastBrain: <span style="color:#22d3ee">{t_lb:.1f}s</span>
<span style="color:#64748b">|</span> S1: {se.get('S1',0):.1f}s S2: {se.get('S2',0):.1f}s S5: {se.get('S5',0):.1f}s
<span style="color:#64748b">|</span> x{t_lb/max(t_raw,0.1):.1f}</div>"""

    raw_html = f"""<div style="padding:16px;background:#fff;border-radius:10px;border:1px solid #e2e5f0">
<div style="font-size:11px;color:#94a3b8;margin-bottom:8px;font-family:'JetBrains Mono',monospace">
RAW {mn} | {t_raw:.1f}s | {len(raw.split())} words</div>
<div style="white-space:pre-wrap;font-size:13px;line-height:1.7">{raw}</div></div>"""

    marl_html = f"""<div style="padding:16px;background:#fff;border-radius:10px;border:2px solid #22d3ee">
<div style="font-size:11px;color:#22d3ee;margin-bottom:8px;font-family:'JetBrains Mono',monospace">
LASTBRAIN {mn} | {t_lb:.1f}s | {len(result.answer.split())} words</div>
<div style="white-space:pre-wrap;font-size:13px;line-height:1.7">{result.answer}</div></div>"""

    trace_parts = []
    for stage, text in result.trace.items():
        ts = se.get(stage.split("_")[0], 0)
        trace_parts.append(f"""<details style="margin:4px 0"><summary style="cursor:pointer;font-weight:600;font-size:12px;padding:6px;background:#f8f9fc;border-radius:6px">
{stage} ({ts:.1f}s)</summary>
<pre style="white-space:pre-wrap;font-size:11px;padding:10px;background:#0f172a;color:#e2e8f0;border-radius:6px;max-height:300px;overflow-y:auto;margin-top:4px">{text}</pre></details>""")
    trace_html = "\n".join(trace_parts)

    yield status, raw_html, marl_html, trace_html, ""

    # Judge
    if api_key:
        progress(0.8, desc="[Judge] GPT-5.4 evaluating...")
        v = judge_with_gpt(prompt, raw, result.answer, api_key)
        if "error" in v:
            judge_html = f"<div style='color:#e11d48;padding:12px'>Judge error: {v['error']}</div>"
        else:
            sa, sb = v.get("score_a",{}), v.get("score_b",{})
            w = v.get("winner","?"); reason = v.get("reason","")
            wc = "#94a3b8" if w == "A" else "#22d3ee" if w == "B" else "#fbbf24"
            wl = f"Raw {mn}" if w == "A" else "LastBrain" if w == "B" else "TIE"
            wi = "A" if w == "A" else "B" if w == "B" else "="
            judge_html = f"""<div style="padding:20px;background:linear-gradient(135deg,#0f172a,#1e293b);border-radius:12px;border:2px solid {wc}">
<div style="text-align:center;font-size:24px;font-weight:800;color:{wc};margin-bottom:16px;font-family:'JetBrains Mono',monospace;letter-spacing:1px">
WINNER: {wl} [{wi}]</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-bottom:16px">
<div style="padding:14px;background:rgba(255,255,255,0.05);border-radius:8px;border:1px solid #334155">
<div style="font-weight:700;color:#94a3b8;margin-bottom:8px;font-size:13px">A — Raw {mn}: {sa.get('total',0)}/100</div>
<div style="font-size:11px;color:#64748b;line-height:1.6">
Accuracy: {sa.get('accuracy',0)} | Completeness: {sa.get('completeness',0)}<br>
Self-awareness: {sa.get('self_awareness',0)} | Reasoning: {sa.get('reasoning',0)}</div></div>
<div style="padding:14px;background:rgba(34,211,238,0.08);border-radius:8px;border:1px solid #22d3ee">
<div style="font-weight:700;color:#22d3ee;margin-bottom:8px;font-size:13px">B — LastBrain: {sb.get('total',0)}/100</div>
<div style="font-size:11px;color:#64748b;line-height:1.6">
Accuracy: {sb.get('accuracy',0)} | Completeness: {sb.get('completeness',0)}<br>
Self-awareness: {sb.get('self_awareness',0)} | Reasoning: {sb.get('reasoning',0)}</div></div></div>
<div style="text-align:center;font-size:11px;color:#64748b">{reason}</div>
<div style="text-align:center;font-size:10px;color:#475569;margin-top:8px;font-family:'JetBrains Mono',monospace">
Raw: {t_raw:.1f}s | LastBrain: {t_lb:.1f}s (x{t_lb/max(t_raw,0.1):.1f}) | Judge: GPT-5.4</div></div>"""
        yield status, raw_html, marl_html, trace_html, judge_html
    else:
        yield status, raw_html, marl_html, trace_html, "<div style='padding:12px;color:#fbbf24;background:#0f172a;border-radius:8px;font-size:12px'>Enter OpenAI API key to enable GPT-5.4 judging.</div>"


# ═══════════════════════════════════════════════════════════════
# EXAMPLES
# ═══════════════════════════════════════════════════════════════
EXAMPLES = [
    ["Is 0.9999... less than 1? Prove your answer.", "Insight", "invent"],
    ["A startup claims 99.9% cancer detection from a selfie. Evaluate this claim.", "Insight", "invent"],
    ["Can you replicate Korean bulgogi taste with only plant-based ingredients? Explain the chemistry.", "Emergence", "recipe"],
    ["Identify ONE existing drug and build a case for repositioning it to treat Alzheimer's.", "Emergence", "pharma"],
    ["Is it physically possible to combine graphene-level strength with rubber-level flexibility?", "Emergence", "chemistry"],
    ["A self-driving car kills a pedestrian. Design a liability framework resolving EU, US, and Korean law.", "Emergence", "law"],
    ["Write ONE movie logline that would make both A24 and Marvel want to bid.", "Emergence", "create"],
    ["An island nation is sinking. $10M budget. Sea walls, coral, or relocation? 50-year analysis.", "Emergence", "ecology"],
]


# ═══════════════════════════════════════════════════════════════
# UI
# ═══════════════════════════════════════════════════════════════
with gr.Blocks(
    title="LastBrain: Survive everything.",
    theme=gr.themes.Base(primary_hue="cyan", neutral_hue="slate"),
    css="""
    .gradio-container { max-width: 1200px !important; background: #0f172a !important; }
    .main { background: #0f172a !important; }
    body { background: #0f172a !important; }
    .dark { background: #0f172a !important; }
    footer { display: none !important; }
    """
) as app:

    # Header
    gr.HTML("""<div style="text-align:center;padding:32px 0 16px">
<h1 style="font-size:42px;font-weight:900;margin:0;letter-spacing:-1px;
background:linear-gradient(135deg,#22d3ee,#6366f1,#a855f7);-webkit-background-clip:text;-webkit-text-fill-color:transparent">
LastBrain</h1>
<p style="color:#22d3ee;font-size:14px;font-weight:600;letter-spacing:4px;text-transform:uppercase;margin:4px 0">
Survive everything.</p>
<p style="color:#475569;font-size:11px;margin:8px 0;font-family:'JetBrains Mono',monospace">
3-Stage Metacognitive Pipeline for Small Models | Raw vs LastBrain | Judged by GPT-5.4</p>
</div>""")

    # Model selection
    gr.HTML("<div style='color:#64748b;font-size:10px;text-transform:uppercase;letter-spacing:2px;padding:0 4px;margin-bottom:4px;font-weight:700'>Select Model</div>")
    with gr.Row():
        with gr.Column(scale=3):
            model_dd = gr.Dropdown(choices=list(MODELS.keys()), label="Model", container=False)
        with gr.Column(scale=1):
            load_btn = gr.Button("Load on GPU", variant="primary")
    load_status = gr.Textbox(label="Status", interactive=False, lines=1)
    model_info = gr.HTML("")

    def show_info(mid):
        if not mid or mid not in MODELS: return ""
        m = MODELS[mid]
        return f"""<div style="padding:10px 14px;background:#1e293b;border-radius:8px;border-left:3px solid #22d3ee;margin:4px 0;font-size:12px;color:#cbd5e1">
<b style="color:#22d3ee">{m['name']}</b>
<span style="color:#475569">|</span> {m['params']}
<span style="color:#475569">|</span> RAM: {m['ram']}
<span style="color:#475569">|</span> Score: <b style="color:#fbbf24">{m['score']}</b>
<span style="color:#475569">|</span> <span style="background:#22d3ee;color:#0f172a;padding:1px 6px;border-radius:4px;font-size:10px;font-weight:700">{m['badge']}</span>
<br><span style="color:#64748b">{m['why']}</span></div>"""

    model_dd.change(fn=show_info, inputs=[model_dd], outputs=[model_info])
    load_btn.click(fn=load_model, inputs=[model_dd], outputs=[load_status])

    # Prompt
    gr.HTML("<hr style='margin:16px 0;border-color:#1e293b'>")
    with gr.Row():
        with gr.Column(scale=3):
            prompt = gr.Textbox(label="Prompt", placeholder="Ask anything...", lines=3)
        with gr.Column(scale=1):
            mode = gr.Radio(["Insight", "Emergence"], value="Insight", label="Mode")
            etype = gr.Dropdown(
                ["invent","create","recipe","pharma","genomics","chemistry","ecology","law","document"],
                value="invent", label="Engine", visible=False)
            mode.change(fn=lambda m: gr.Dropdown(visible=m=="Emergence"), inputs=[mode], outputs=[etype])

    api_key = gr.Textbox(label="OpenAI API Key (GPT-5.4 Judge)", type="password",
                         placeholder="sk-... (recommended)", value=os.getenv("OPENAI_API_KEY",""))
    run_btn = gr.Button("Run A/B Test", variant="primary", size="lg")

    gr.Examples(examples=EXAMPLES, inputs=[prompt, mode, etype], label="Examples")

    # Output
    status_out = gr.HTML()
    with gr.Row():
        with gr.Column():
            gr.HTML("<div style='text-align:center;font-weight:700;color:#94a3b8;font-size:13px;padding:8px'>A — Raw Model</div>")
            raw_out = gr.HTML()
        with gr.Column():
            gr.HTML("<div style='text-align:center;font-weight:700;color:#22d3ee;font-size:13px;padding:8px'>B — LastBrain</div>")
            marl_out = gr.HTML()

    judge_out = gr.HTML()

    with gr.Accordion("Reasoning Trace (S1 → S2 → S5)", open=False):
        trace_out = gr.HTML()

    run_btn.click(fn=run_ab_test, inputs=[prompt, mode, etype, api_key],
                  outputs=[status_out, raw_out, marl_out, trace_out, judge_out])

    gr.HTML(f"""<div style="text-align:center;padding:20px;margin-top:16px">
<span style="color:#22d3ee;font-weight:800;font-size:14px;letter-spacing:2px">LastBrain</span>
<span style="color:#475569;font-size:11px"> — Survive everything.</span><br>
<span style="color:#334155;font-size:10px;font-family:'JetBrains Mono',monospace">
S1 Hypothesis (256t) → S2 Draft+Audit (2048t) → S5 Adversarial Refine (2048t)<br>
<a href="https://vidraft.net" style="color:#475569">VIDRAFT</a> ·
<a href="https://github.com/Vidraft/MARL" style="color:#475569">GitHub</a> ·
<a href="https://pypi.org/project/marl-middleware/" style="color:#475569">PyPI</a> ·
<a href="https://clawhub.ai/Cutechicken99/marl-middleware" style="color:#475569">ClawHub</a>
</span></div>""")

if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=7860)