{
  "source_model": "bknyaz/Qwen3-235B-A22B-Instruct-2507-REAP",
  "quantization": "NVFP4",
  "quant_algo": "NVFP4",
  "quant_method": "modelopt",
  "tool": "nvidia-modelopt",
  "tool_version": "0.39.0",
  "config": "NVFP4_DEFAULT_CFG",
  "format": "packed_fp4_safetensors",
  "block_size": 16,
  "weight_bits": 4,
  "weight_type": "float",
  "scale_type": "E4M3 (FP8) micro-block + FP32 tensor-level",
  "calibration": {
    "samples": 256,
    "tokens": 122858,
    "sources": ["GSM8K (math, 128 samples)", "CNN DailyMail (general, 128 samples)"],
    "max_length": 2048
  },
  "quantizers_inserted": 82629,
  "excluded_layers": ["lm_head", "all MoE gate layers"],
  "model_size_gb": 102,
  "source_size_gb": 350,
  "compression_ratio": 3.4,
  "hardware": {
    "quantization_gpus": "8x NVIDIA H100 80GB HBM3",
    "quantization_time_min": 78.8,
    "export_tool_version": "modelopt 0.39.0"
  },
  "benchmarks": {
    "hardware": "NVIDIA B200",
    "gsm8k_cot_8shot": {"bf16": 0.9007, "nvfp4": 0.8961, "delta": -0.0046},
    "gpqa_diamond_0shot": {"bf16": 0.4192, "nvfp4": 0.3939, "delta": -0.0253},
    "ifeval_inst_loose": {"bf16": 0.7278, "nvfp4": 0.7146, "delta": -0.0132}
  }
}