""" LastBrain: Survive everything. ═══════════════════════════════════════════════════════════════ 3-Stage MARL Mobile Pipeline for Small Models (≤4B) on T4 GPU. A: Raw model output (single call) B: LastBrain 3-stage pipeline (Hypothesis → Draft+Audit → Adversarial Refine) Judge: GPT-5.4 scores both outputs 0-100. Built by VIDRAFT — https://vidraft.net """ import json import os import time import random import torch import gradio as gr from dataclasses import dataclass, field from typing import Callable, Dict, Optional from threading import Lock from transformers import AutoTokenizer, AutoModelForCausalLM # ═══════════════════════════════════════════════════════════════ # LASTBRAIN CORE — 3-Stage Pipeline # ═══════════════════════════════════════════════════════════════ @dataclass class LastBrainConfig: mode: str = "insight" emergence_type: str = "invent" s1_budget: int = 256 s2_budget: int = 2048 s5_budget: int = 2048 s1_temp: float = 0.7 s2_temp: float = 0.5 s5_temp: float = 0.4 auto_continue: bool = True continue_budget: int = 1024 include_trace: bool = True @dataclass class LastBrainResult: answer: str = "" raw_answer: str = "" trace: Dict[str, str] = field(default_factory=dict) elapsed: float = 0.0 stages_elapsed: Dict[str, float] = field(default_factory=dict) S1_SYSTEM = """You are S1_Hypothesis — Trap & Angle Detector. Your ONLY job: find hidden traps, contradictions, and the best angle of attack. Output EXACTLY 3 bullets, nothing more: (1) Core trap or hidden assumption (2) Key contradiction or missing nuance (3) Best angle to approach this correctly [BUDGET: 80 words MAX]""" S2_SYSTEM = """You are S2_DraftAuditor — Solver + Self-Checker. Write a COMPLETE answer to the task. RULES: - After EVERY major claim, add [CHECK: brief self-verification] - If you're uncertain, say so explicitly - At the end, add [GAPS: anything you might have missed] Be thorough but concise.""" S5_SYSTEM = """You are S5_AdversarialRefiner — Final Quality Gate. You receive a draft answer with [CHECK] tags. Your job: 1. Hunt for hallucination, overconfidence, logical errors, missing nuance 2. Fix ALL errors silently 3. Write a COMPLETE NEW final answer (not patches) 4. Remove all [CHECK] and [GAPS] tags — output clean text only The user sees ONLY your output. Make it perfect.""" MODE_SEEDS = { "insight": [ "Look for common misconceptions that sound plausible", "Check if the question contains a hidden false premise", "Consider edge cases that change the answer completely", "Verify if popular beliefs contradict scientific evidence", ], "invent": [ "[TRIZ] Segmentation: divide object into independent parts", "[TRIZ] Asymmetry: change symmetrical form to asymmetrical", "[BIO] Spider silk: tensile strength 5x steel at 1/6 density", "[CONTRADICTION] Strength vs Flexibility — resolve via hierarchy", ], "create": [ "[TROPE] Chosen One -> The chosen one was chosen by mistake", "[PARADOX] Bootstrap: effect precedes its own cause", "[GENRE] Horror x Comedy -> terror played completely straight by funny people", "[SENSE] Synesthesia: what does the color of loneliness taste like?", ], "recipe": [ "[FLAVOR] Umami x Acid -> fermented + citrus bridge", "[TEXTURE] Crispy outside x Molten inside -> temperature contrast", "[METHOD] Sous-vide precision x Wok-hei chaos -> controlled disorder", ], "pharma": [ "[TARGET] Repurpose: existing approved drug -> new disease indication", "[MECHANISM] Checkpoint inhibitor logic -> apply to neurodegeneration", "[DELIVERY] Nanoparticle encapsulation -> cross blood-brain barrier", ], "genomics": [ "[LETHALITY] Synthetic lethality: gene A + gene B knockout = selective kill", "[PATHWAY] Crosstalk: MAPK <-> PI3K interaction in resistance", "[PLATFORM] CRISPR base editing -> single nucleotide precision", ], "chemistry": [ "[PROPERTY] Contradictory: hard + flexible simultaneously via gradient", "[SCALE] Nano-property -> macro-application via self-assembly", "[BIO] Nacre structure: brick-and-mortar -> 3000x toughness increase", ], "ecology": [ "[TRANSFER] Island conservation success -> apply to urban fragment", "[INVERSION] Invasive threat -> commercial resource (lionfish -> sashimi)", "[STACK] Single intervention -> carbon + water + food + mental health", ], "law": [ "[JURISDICTION] EU strict liability vs US negligence -> hybrid framework", "[COLLISION] AI-generated content x copyright law -> new doctrine needed", "[TRANSPLANT] Data privacy framework -> apply to genetic data", ], "document": [ "[STRUCTURE] Argue BOTH sides before concluding", "[EVIDENCE] Every claim needs a verifiable source or explicit uncertainty", "[DILEMMA] Present the core tradeoff the reader must decide", ], } class LastBrain: """3-stage MARL pipeline: S1 Hypothesis -> S2 Draft+Audit -> S5 Adversarial Refine""" def __init__(self, call_fn: Callable, config: Optional[LastBrainConfig] = None): self.call_fn = call_fn self.config = config or LastBrainConfig() def _get_seeds(self, prompt): mode = self.config.emergence_type if self.config.mode == "emergence" else "insight" seeds = MODE_SEEDS.get(mode, MODE_SEEDS["insight"]) return "\n".join(random.sample(seeds, min(2, len(seeds)))) def _detect_truncation(self, text): if not text or len(text) < 20: return False if text[-1] == '\n': return False t = text.rstrip() if len(t) < 10: return False return t[-1] not in '.!?)"\':\n' def run(self, prompt, system_context=""): start = time.time() trace, stages_elapsed = {}, {} full_prompt = f"[Context]\n{system_context}\n\n[Task]\n{prompt}" if system_context else prompt seeds = self._get_seeds(prompt) # S1: Hypothesis + Seeds t1 = time.time() mode_label = self.config.emergence_type.upper() if self.config.mode == "emergence" else "INSIGHT" s1_out = self.call_fn(full_prompt, f"{S1_SYSTEM}\n\n[MODE: {mode_label}]\n[SEEDS]\n{seeds}", self.config.s1_budget, self.config.s1_temp) trace["S1_Hypothesis"] = s1_out stages_elapsed["S1"] = time.time() - t1 # S2: Draft + Inline Audit t2 = time.time() s2_ctx = f"[S1 ANALYSIS]\n{s1_out[:300]}\n\n" if s1_out and not s1_out.startswith("[ERROR") else "" s2_out = self.call_fn(f"{s2_ctx}[TASK]\n{full_prompt}", S2_SYSTEM, self.config.s2_budget, self.config.s2_temp) if self.config.auto_continue and self._detect_truncation(s2_out): cont = self.call_fn( f"You were writing but got CUT OFF. Last part:\n---\n{s2_out[-500:]}\n---\n" f"CONTINUE from exactly where you stopped. Do NOT repeat.", "You are S2_DraftAuditor continuing. Be concise.", self.config.continue_budget, self.config.s2_temp) if cont and not cont.startswith("[ERROR"): s2_out = s2_out + "\n" + cont trace["S2_DraftAudit"] = s2_out stages_elapsed["S2"] = time.time() - t2 # S5: Adversarial Refine t5 = time.time() s2_compressed = s2_out[:1500] if len(s2_out) > 1500: s2_compressed += "\n[... draft truncated — write your OWN complete version]" s5_prompt = (f"[ORIGINAL TASK]\n{prompt}\n\n" f"[S1 TRAPS]\n{s1_out[:200] if s1_out else 'none'}\n\n" f"[S2 DRAFT — reference only, write your OWN]\n{s2_compressed}") s5_out = self.call_fn(s5_prompt, S5_SYSTEM, self.config.s5_budget, self.config.s5_temp) if self.config.auto_continue and self._detect_truncation(s5_out): cont = self.call_fn( f"You were writing the FINAL ANSWER but got CUT OFF:\n---\n{s5_out[-500:]}\n---\n" f"CONTINUE from exactly where you stopped. Complete ALL remaining items.", "You are S5_AdversarialRefiner continuing. Clean final text only.", self.config.continue_budget, self.config.s5_temp) if cont and not cont.startswith("[ERROR"): s5_out = s5_out + "\n" + cont trace["S5_AdversarialRefine"] = s5_out stages_elapsed["S5"] = time.time() - t5 answer = s5_out if s5_out and not s5_out.startswith("[ERROR") else s2_out return LastBrainResult(answer=answer, raw_answer=s2_out, trace=trace, elapsed=time.time() - start, stages_elapsed=stages_elapsed) # ═══════════════════════════════════════════════════════════════ # MODELS # ═══════════════════════════════════════════════════════════════ MODELS = { "google/gemma-3n-e4b-it": { "name": "Gemma-3n-E4B", "params": "4B (2B active)", "ram": "~2GB", "why": "Self-correction 89, Metacognition 90 — optimal for LastBrain", "score": 87.5, "badge": "Most Balanced", }, "Qwen/Qwen3-4B": { "name": "Qwen3-4B", "params": "4B", "ram": "~2.8GB", "why": "Trap detection 100, Math 100, Coding 100 — strongest reasoning", "score": 86.9, "badge": "Best Reasoning", }, "Qwen/Qwen3-1.7B": { "name": "Qwen3-1.7B", "params": "1.7B", "ram": "~1.2GB", "why": "Metacognition 90 at 1.7B — ultralight mobile champion", "score": 76.8, "badge": "Lightest", }, } loaded_model = {"id": None, "tokenizer": None, "model": None} model_lock = Lock() # ═══════════════════════════════════════════════════════════════ # GPU MODEL LOADING # ═══════════════════════════════════════════════════════════════ def load_model(model_id, progress=gr.Progress()): global loaded_model if not model_id or model_id not in MODELS: return "Select a model first." with model_lock: if loaded_model["id"] == model_id: return f"Already loaded: {MODELS[model_id]['name']}" progress(0.1, desc="Clearing GPU memory...") if loaded_model["model"] is not None: del loaded_model["model"]; del loaded_model["tokenizer"] torch.cuda.empty_cache() progress(0.3, desc=f"Loading {MODELS[model_id]['name']}...") try: tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) mdl = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True) loaded_model.update({"id": model_id, "tokenizer": tok, "model": mdl}) vram = torch.cuda.memory_allocated() / 1024**3 progress(1.0, desc="Done!") return f"{MODELS[model_id]['name']} loaded | VRAM: {vram:.1f}GB" except Exception as e: loaded_model.update({"id": None, "tokenizer": None, "model": None}) return f"Failed: {str(e)[:200]}" # ═══════════════════════════════════════════════════════════════ # LOCAL INFERENCE # ═══════════════════════════════════════════════════════════════ def local_generate(prompt, system="", max_tokens=2048, temperature=0.5): if loaded_model["model"] is None: return "[ERROR] No model loaded" tok, mdl = loaded_model["tokenizer"], loaded_model["model"] messages = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt}) try: text = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) except Exception: text = f"{system}\n\n{prompt}" if system else prompt inputs = tok(text, return_tensors="pt", truncation=True, max_length=4096).to(mdl.device) with torch.no_grad(): outputs = mdl.generate(**inputs, max_new_tokens=max_tokens, temperature=max(temperature, 0.01), do_sample=True, top_p=0.9, repetition_penalty=1.1, pad_token_id=tok.eos_token_id) return tok.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip() # ═══════════════════════════════════════════════════════════════ # GPT-5.4 JUDGE # ═══════════════════════════════════════════════════════════════ JUDGE_PROMPT = """You are an expert AI evaluator. Score two AI responses to the same question. QUESTION: {question} RESPONSE A (Raw model, single call): {response_a} RESPONSE B (LastBrain, 3-stage pipeline): {response_b} Score each response on these criteria (0-100): 1. Accuracy: Are facts correct? Any hallucination? 2. Completeness: Are all aspects addressed? 3. Self-awareness: Does it acknowledge uncertainty when appropriate? 4. Reasoning depth: Is reasoning thorough and multi-layered? Respond ONLY with JSON: {{"score_a":{{"accuracy":N,"completeness":N,"self_awareness":N,"reasoning":N,"total":N}},"score_b":{{"accuracy":N,"completeness":N,"self_awareness":N,"reasoning":N,"total":N}},"winner":"A" or "B" or "TIE","reason":"one sentence"}}""" def judge_with_gpt(question, response_a, response_b, api_key): if not api_key: return {"error": "OpenAI API key required for judging"} import requests try: r = requests.post("https://api.openai.com/v1/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json={"model": "gpt-5.4", "max_completion_tokens": 500, "temperature": 0, "messages": [{"role": "user", "content": JUDGE_PROMPT.format( question=question[:1000], response_a=response_a[:2000], response_b=response_b[:2000])}]}, timeout=30) text = r.json()["choices"][0]["message"]["content"].strip().replace("```json","").replace("```","").strip() return json.loads(text) except Exception as e: return {"error": str(e)[:200]} # ═══════════════════════════════════════════════════════════════ # A/B TEST # ═══════════════════════════════════════════════════════════════ def run_ab_test(prompt, mode, etype, api_key, progress=gr.Progress()): if loaded_model["model"] is None: yield "Load a model first.", "", "", "", "" return mn = MODELS.get(loaded_model["id"], {}).get("name", "?") # Raw progress(0.1, desc="[A] Raw generating...") t0 = time.time() raw = local_generate(prompt, "Answer thoroughly and accurately.", 2048, 0.5) t_raw = time.time() - t0 # LastBrain progress(0.4, desc="[B] LastBrain S1...") cfg = LastBrainConfig(mode="emergence" if mode == "Emergence" else "insight", emergence_type=etype.lower() if etype else "invent") lb = LastBrain(local_generate, cfg) result = lb.run(prompt) t_lb = result.elapsed se = result.stages_elapsed status = f"""
{text}Survive everything.
3-Stage Metacognitive Pipeline for Small Models | Raw vs LastBrain | Judged by GPT-5.4