from dotenv import load_dotenv import os from huggingface_hub import login load_dotenv() hf_token = os.getenv('HF_TOKEN') if hf_token: login(hf_token) from datasets import load_dataset def main(): # ———————————————————————————— # 1) Définir les noms des datasets ciblés # ———————————————————————————— # Datasets connus liés à la sécurité / harmfulness / jailbreak dataset_names = [ "ai-safety-institute/AgentHarm", # Benchmark de comportements malveillants d'agents LLM :contentReference[oaicite:1]{index=1} "LLM-Tuning-Safety/HEx-PHI", # Harmful instruction benchmark (nécessite probablement accord/licence) :contentReference[oaicite:2]{index=2} "psyonp/SocialHarmBench", # (si disponible) Bench sociopolitique de vulnérabilités LLM :contentReference[oaicite:3]{index=3} "Anthropic/hh-rlhf", # Helpful & Harmless (pas explicitement sécurité, mais inclut refus)* "declare-lab/HarmfulQA", # Hypothèse de dataset de questions visant à générer réponses dangereuses* "lasrprobegen/sycophancy-activations", # Hypothèse de dataset pour sycophancy / comportement de flatterie* "walledai/AdvBench" ] # ———————————————————————————— # 2) Charger chaque dataset # ———————————————————————————— loaded_datasets = {} for name in dataset_names: try: print(f"➡️ Chargement du dataset: {name}") if name == "ai-safety-institute/AgentHarm": # This dataset has multiple configs configs = ["harmless_benign", "harmful", "chat"] for config in configs: try: ds = load_dataset(name, config) loaded_datasets[f"{name}_{config}"] = ds print(f" 📊 Config {config} - Splits disponibles: {list(ds.keys())}") # Si le dataset possède des descriptions, on peut aussi afficher un échantillon rapide for split in ds.keys(): print(f" ➤ Exemple de {config} {split}:\n", ds[split][0]) break # Only show one split example per config except Exception as e: print(f"❌ Échec du chargement {name} config {config}: {e}") else: ds = load_dataset(name) loaded_datasets[name] = ds print(f" 📊 Splits disponibles: {list(ds.keys())}") # Si le dataset possède des descriptions, on peut aussi afficher un échantillon rapide for split in ds.keys(): print(f" ➤ Exemple de {split}:\n", ds[split][0]) break # Only show one split example except Exception as e: print(f"❌ Échec du chargement {name}: {e}") # ———————————————————————————— # 3) Résumé des datasets chargés # ———————————————————————————— print("\n✅ Résumé des jeux de données chargés:") for name, ds in loaded_datasets.items(): try: print(f"- {name}:") for split in ds.keys(): print(f" * {split}: {ds[split].num_rows} exemples") except Exception as e: print(f" ❌ Erreur lors du résumé: {e}") print("\n➡️ Fin du script.") if __name__ == "__main__": main()