CerberusDet
Collection
Official models for CerberusDet: A unified, multi-headed YOLO framework for efficient multi-dataset object detection. • 3 items • Updated
• 2
📜 Paper on arXiv | Github | 🤗 All CerberusDet Models |
CerberusDet is a unified multi-dataset object detection framework based on the YOLO architecture. This specific model checkpoint was trained simultaneously on two datasets: PASCAL VOC and the Animals subset of Objects365.
It demonstrates the ability to handle multiple domains with conflicting class definitions within a single model, achieving state-of-the-art performance with reduced inference time compared to running separate models.
Inference Speed: 7.2 ms on NVIDIA V100 (FP16, batch 32). This is faster than running two separate YOLOv8 models sequentially (approx. 11.2 ms).
Requirements:
pip install huggingface_hub git+https://github.com/ai-forever/CerberusDet transformers==4.50.0 accelerate==1.8.1
from transformers import AutoImageProcessor, AutoConfig, AutoModel
import cv2
import torch
model_name = "iitolstykh/cerberusdet-yolov8x-voc-o365-animals"
device = torch.device("cpu") if not torch.cuda.is_available() else torch.device("cuda:0")
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
cerberus_model = AutoModel.from_pretrained(
model_name, trust_remote_code=True, config=config, torch_dtype=torch_dtype,
).to(device)
image_processor = AutoImageProcessor.from_pretrained(
model_name, trust_remote_code=True, half=cerberus_model.half, stride=config.stride,
)
image = cv2.imread(img_path) # BGR
inputs = image_processor(images=[image], device=device)
# inference
output = cerberus_model(**inputs, return_dict=True)
# print results
batch_size = len(output.boxes)
for i in range(batch_size):
print(f"\n--- Results for Image {i} ---")
boxes, scores, labels, task_indices = output.boxes[i], output.scores[i], output.labels[i], output.tasks_ids[i]
for j in range(len(scores)):
score, label_id, task_idx = scores[j].item(), int(labels[j].item()), int(task_indices[j].item())
class_name = config.all_class_names[label_id]
task_name = config.task_ids[task_idx]
box = boxes[j].tolist() # [x1, y1, x2, y2]
print(f"Object {j}:")
print(f" • Class: {class_name} (ID: {label_id})")
print(f" • Task: {task_name}")
print(f" • Conf: {score:.4f}")
print(f" • Box: {box}")
pip install git+https://github.com/ai-forever/CerberusDet
import cv2
from cerberusdet.cerberusdet_inference import CerberusDetInference, CerberusVisualizer
from cerberusdet.cerberusdet_preprocessor import CerberusPreprocessor
from huggingface_hub import hf_hub_download
import torch
# 1. Download model weights
model_path = hf_hub_download(
repo_id="iitolstykh/cerberusdet-yolov8x-voc-o365-animals",
filename="voc_obj365_animals_v8x_best.pt",
repo_type="model"
)
device = 'cuda:0'
inferencer = CerberusDetInference(
weights=model_path,
device=device,
conf_thres=0.3,
iou_thres=0.45,
half=True
)
# Note: Pass the model's stride to the preprocessor
preprocessor = CerberusPreprocessor(
img_size=640,
stride=inferencer.stride,
half=inferencer.half,
auto=True
)
visualizer = CerberusVisualizer(line_thickness=2, text_scale=0.5)
# 3. Load images
# The preprocessor expects a list of numpy arrays (BGR)
images = [cv2.imread(img_path)]
original_shapes = [img.shape[:2] for img in images]
# 4. Run inference
img_tensor = preprocessor.preprocess(images, device=inferencer.device)
detections = inferencer.predict(img_tensor, original_shape=original_shapes)
# Visualization
res_image = visualizer.draw_detections(
images[0],
detections[0],
hide_task=False, # Show task name (VOC, O365, etc.)
hide_conf=False # Show confidence score
)
# 5. Output / Save results
print(f"Found objects: {len(detections[0])}")
for det in detections[0]:
print(f"{det['label_name']} ({det['score']:.2f}) - Task: {det['task']}")
cv2.imshow("CerberusDet Result", res_image)
cv2.imwrite("result.jpg", res_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
If you use this model in your research, please cite our paper:
@article{cerberusdet,
Author = {Irina Tolstykh, Michael Chernyshov, Maksim Kuprashevich},
Title = {CerberusDet: Unified Multi-Dataset Object Detection},
Year = {2024},
Eprint = {arXiv:2407.12632},
}