{ "source_model": "bknyaz/Qwen3-235B-A22B-Instruct-2507-REAP", "quantization": "NVFP4", "quant_algo": "NVFP4", "quant_method": "modelopt", "tool": "nvidia-modelopt", "tool_version": "0.39.0", "config": "NVFP4_DEFAULT_CFG", "format": "packed_fp4_safetensors", "block_size": 16, "weight_bits": 4, "weight_type": "float", "scale_type": "E4M3 (FP8) micro-block + FP32 tensor-level", "calibration": { "samples": 256, "tokens": 122858, "sources": ["GSM8K (math, 128 samples)", "CNN DailyMail (general, 128 samples)"], "max_length": 2048 }, "quantizers_inserted": 82629, "excluded_layers": ["lm_head", "all MoE gate layers"], "model_size_gb": 102, "source_size_gb": 350, "compression_ratio": 3.4, "hardware": { "quantization_gpus": "8x NVIDIA H100 80GB HBM3", "quantization_time_min": 78.8, "export_tool_version": "modelopt 0.39.0" }, "benchmarks": { "hardware": "NVIDIA B200", "gsm8k_cot_8shot": {"bf16": 0.9007, "nvfp4": 0.8961, "delta": -0.0046}, "gpqa_diamond_0shot": {"bf16": 0.4192, "nvfp4": 0.3939, "delta": -0.0253}, "ifeval_inst_loose": {"bf16": 0.7278, "nvfp4": 0.7146, "delta": -0.0132} } }