Image-Text-to-Text
Transformers
Safetensors
PyTorch
English
clip-fusion
feature-extraction
multimodal
hate-speech-detection
content-moderation
clip
vision-language
image-text
classification
multi-label-classification
social-media
meme-classification
text-image
late-fusion
gated-attention
custom_code
Eval Results (legacy)
| { | |
| "model_type": "clip-fusion", | |
| "architectures": ["MultiModalFusionClassifier"], | |
| "encoder_name": "openai/clip-vit-base-patch32", | |
| "backend": "clip", | |
| "fusion_dim": 512, | |
| "num_labels": 5, | |
| "class_names": ["racist", "sexist", "homophobe", "religion", "otherhate"], | |
| "thresholds": [0.35, 0.7, 0.75, 0.3, 0.6], | |
| "problem_type": "multi_label_classification", | |
| "id2label": { | |
| "0": "racist", | |
| "1": "sexist", | |
| "2": "homophobe", | |
| "3": "religion", | |
| "4": "otherhate" | |
| }, | |
| "label2id": { | |
| "racist": 0, | |
| "sexist": 1, | |
| "homophobe": 2, | |
| "religion": 3, | |
| "otherhate": 4 | |
| } | |
| } | |