{ "architectures": [ "CerberusDetForObjectDetection" ], "auto_map": { "AutoConfig": "configuration_cerberus.CerberusDetConfig", "AutoModel": "modeling_cerberus.CerberusDetForObjectDetection", "AutoImageProcessor": "cerberus_image_processor.CerberusDetImageProcessor" }, "init_device": "cpu", "model_type": "cerberus_v8x", "torch_dtype": "float16", "transformers_version": "4.50.0", "verbose": 0, "initializer_range": 0.02, "stride": [ 8, 16, 32], "agnostic_nms": false, "conf_thres": 0.3, "iou_thres": 0.45, "iou_thres_between_tasks": 0.8, "tasks_names": { "voc": [ "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ], "objects365_animals": [ "Monkey", "Rabbit", "Yak", "Antelope", "Pig", "Bear", "Deer", "Giraffe", "Zebra", "Elephant", "Lion", "Donkey", "Camel", "Jellyfish", "Other Fish", "Dolphin", "Crab", "Seal", "Goldfish" ] }, "tasks_nc": [ 20, 19 ], "config_name": "voc_obj365_animals.yaml", "cfg": { "depth_multiple": 1.0, "width_multiple": 1.25, "backbone": [ [ -1, 1, "Conv", [ 64, 3, 2 ] ], [ -1, 1, "Conv", [ 128, 3, 2 ] ], [ -1, 3, "C2f", [ 128, true ] ], [ -1, 1, "Conv", [ 256, 3, 2 ] ], [ -1, 6, "C2f", [ 256, true ] ], [ -1, 1, "Conv", [ 512, 3, 2 ] ], [ -1, 6, "C2f", [ 512, true ] ], [ -1, 1, "Conv", [ 512, 3, 2 ] ], [ -1, 3, "C2f", [ 512, true ] ], [ -1, 1, "SPPF", [ 512, 5 ] ] ], "neck": [ [ 9, 1, "nn.Upsample", [ "None", 2, "nearest" ] ], [ [ -1, 6 ], 1, "Concat", [ 1 ] ], [ -1, 3, "C2f", [ 512 ] ], [ -1, 1, "nn.Upsample", [ "None", 2, "nearest" ] ], [ [ -1, 4 ], 1, "Concat", [ 1 ] ], [ -1, 3, "C2f", [ 256 ] ], [ -1, 1, "Conv", [ 256, 3, 2 ] ], [ [ -1, 12 ], 1, "Concat", [ 1 ] ], [ -1, 3, "C2f", [ 512 ] ], [ -1, 1, "Conv", [ 512, 3, 2 ] ], [ [ -1, 9 ], 1, "Concat", [ 1 ] ], [ -1, 3, "C2f", [ 512 ] ] ], "head": [ [ [ 15, 18, 21 ], 1, "Detect", [] ] ], "cerber": [ [ 2, [ [ 13 ], [ 14 ] ] ] ], "ch": 3, "nc": [ 20, 19 ] } }