Skip to content

Commit f7356f4

Browse files
authored
[Feature] Support FCOS3D head (open-mmlab#442)
* Support base mono3d dense head and anchor free mono3d head * Support FCOS3D head * Support FCOS3D baseline on nuScenes * Fix an import error caused by update of mmcv/mmdet * Change img_scale to scale_factor in the MultiScaleFlipAug in the config * Add pred_bbox2d in the params of anchor_free_mono3d_head * Add unit test for fcos3d head * Fix a minor bug when setting img_metas in the unit test * Add unit test for fcos3d detector * Simplify the logic of weights initialization * Add comments to specify the reason of cloning features * Update head config
1 parent a0090aa commit f7356f4

9 files changed

Lines changed: 1902 additions & 4 deletions

File tree

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
dataset_type = 'NuScenesMonoDataset'
2+
data_root = 'data/nuscenes/'
3+
class_names = [
4+
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
5+
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
6+
]
7+
# Input modality for nuScenes dataset, this is consistent with the submission
8+
# format which requires the information in input_modality.
9+
input_modality = dict(
10+
use_lidar=False,
11+
use_camera=True,
12+
use_radar=False,
13+
use_map=False,
14+
use_external=False)
15+
img_norm_cfg = dict(
16+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
17+
train_pipeline = [
18+
dict(type='LoadImageFromFileMono3D'),
19+
dict(
20+
type='LoadAnnotations3D',
21+
with_bbox=True,
22+
with_label=True,
23+
with_attr_label=True,
24+
with_bbox_3d=True,
25+
with_label_3d=True,
26+
with_bbox_depth=True),
27+
dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
28+
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
29+
dict(type='Normalize', **img_norm_cfg),
30+
dict(type='Pad', size_divisor=32),
31+
dict(type='DefaultFormatBundle3D', class_names=class_names),
32+
dict(
33+
type='Collect3D',
34+
keys=[
35+
'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
36+
'gt_labels_3d', 'centers2d', 'depths'
37+
]),
38+
]
39+
test_pipeline = [
40+
dict(type='LoadImageFromFileMono3D'),
41+
dict(
42+
type='MultiScaleFlipAug',
43+
scale_factor=1.0,
44+
flip=False,
45+
transforms=[
46+
dict(type='RandomFlip3D'),
47+
dict(type='Normalize', **img_norm_cfg),
48+
dict(type='Pad', size_divisor=32),
49+
dict(
50+
type='DefaultFormatBundle3D',
51+
class_names=class_names,
52+
with_label=False),
53+
dict(type='Collect3D', keys=['img']),
54+
])
55+
]
56+
data = dict(
57+
samples_per_gpu=2,
58+
workers_per_gpu=2,
59+
train=dict(
60+
type=dataset_type,
61+
data_root=data_root,
62+
ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
63+
img_prefix=data_root,
64+
classes=class_names,
65+
pipeline=train_pipeline,
66+
modality=input_modality,
67+
test_mode=False,
68+
box_type_3d='Camera'),
69+
val=dict(
70+
type=dataset_type,
71+
data_root=data_root,
72+
ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
73+
img_prefix=data_root,
74+
classes=class_names,
75+
pipeline=test_pipeline,
76+
modality=input_modality,
77+
test_mode=True,
78+
box_type_3d='Camera'),
79+
test=dict(
80+
type=dataset_type,
81+
data_root=data_root,
82+
ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
83+
img_prefix=data_root,
84+
classes=class_names,
85+
pipeline=test_pipeline,
86+
modality=input_modality,
87+
test_mode=True,
88+
box_type_3d='Camera'))
89+
evaluation = dict(interval=2)

configs/_base_/models/fcos3d.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
model = dict(
2+
type='FCOSMono3D',
3+
pretrained='open-mmlab://detectron2/resnet101_caffe',
4+
backbone=dict(
5+
type='ResNet',
6+
depth=101,
7+
num_stages=4,
8+
out_indices=(0, 1, 2, 3),
9+
frozen_stages=1,
10+
norm_cfg=dict(type='BN', requires_grad=False),
11+
norm_eval=True,
12+
style='caffe'),
13+
neck=dict(
14+
type='FPN',
15+
in_channels=[256, 512, 1024, 2048],
16+
out_channels=256,
17+
start_level=1,
18+
add_extra_convs=True,
19+
extra_convs_on_inputs=False, # use P5
20+
num_outs=5,
21+
relu_before_extra_convs=True),
22+
bbox_head=dict(
23+
type='FCOSMono3DHead',
24+
num_classes=10,
25+
in_channels=256,
26+
stacked_convs=2,
27+
feat_channels=256,
28+
use_direction_classifier=True,
29+
diff_rad_by_sin=True,
30+
pred_attrs=True,
31+
pred_velo=True,
32+
dir_offset=0.7854, # pi/4
33+
strides=[8, 16, 32, 64, 128],
34+
group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
35+
cls_branch=(256, ),
36+
reg_branch=(
37+
(256, ), # offset
38+
(256, ), # depth
39+
(256, ), # size
40+
(256, ), # rot
41+
() # velo
42+
),
43+
dir_branch=(256, ),
44+
attr_branch=(256, ),
45+
loss_cls=dict(
46+
type='FocalLoss',
47+
use_sigmoid=True,
48+
gamma=2.0,
49+
alpha=0.25,
50+
loss_weight=1.0),
51+
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
52+
loss_dir=dict(
53+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
54+
loss_attr=dict(
55+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
56+
loss_centerness=dict(
57+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
58+
norm_on_bbox=True,
59+
centerness_on_reg=True,
60+
center_sampling=True,
61+
conv_bias=True,
62+
dcn_on_last_conv=True),
63+
train_cfg=dict(
64+
allowed_border=0,
65+
code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
66+
pos_weight=-1,
67+
debug=False),
68+
test_cfg=dict(
69+
use_rotate_nms=True,
70+
nms_across_levels=False,
71+
nms_pre=1000,
72+
nms_thr=0.8,
73+
score_thr=0.05,
74+
min_bbox_size=0,
75+
max_per_img=200))
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
_base_ = [
2+
'../_base_/datasets/nus-mono3d.py', '../_base_/models/fcos3d.py',
3+
'../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
4+
]
5+
# model settings
6+
model = dict(
7+
backbone=dict(
8+
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
9+
stage_with_dcn=(False, False, True, True)))
10+
11+
class_names = [
12+
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
13+
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
14+
]
15+
img_norm_cfg = dict(
16+
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
17+
train_pipeline = [
18+
dict(type='LoadImageFromFileMono3D'),
19+
dict(
20+
type='LoadAnnotations3D',
21+
with_bbox=True,
22+
with_label=True,
23+
with_attr_label=True,
24+
with_bbox_3d=True,
25+
with_label_3d=True,
26+
with_bbox_depth=True),
27+
dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
28+
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
29+
dict(type='Normalize', **img_norm_cfg),
30+
dict(type='Pad', size_divisor=32),
31+
dict(type='DefaultFormatBundle3D', class_names=class_names),
32+
dict(
33+
type='Collect3D',
34+
keys=[
35+
'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
36+
'gt_labels_3d', 'centers2d', 'depths'
37+
]),
38+
]
39+
test_pipeline = [
40+
dict(type='LoadImageFromFileMono3D'),
41+
dict(
42+
type='MultiScaleFlipAug',
43+
scale_factor=1.0,
44+
flip=False,
45+
transforms=[
46+
dict(type='RandomFlip3D'),
47+
dict(type='Normalize', **img_norm_cfg),
48+
dict(type='Pad', size_divisor=32),
49+
dict(
50+
type='DefaultFormatBundle3D',
51+
class_names=class_names,
52+
with_label=False),
53+
dict(type='Collect3D', keys=['img']),
54+
])
55+
]
56+
data = dict(
57+
samples_per_gpu=2,
58+
workers_per_gpu=2,
59+
train=dict(pipeline=train_pipeline),
60+
val=dict(pipeline=test_pipeline),
61+
test=dict(pipeline=test_pipeline))
62+
# optimizer
63+
optimizer = dict(
64+
lr=0.002, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))
65+
optimizer_config = dict(
66+
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
67+
# learning policy
68+
lr_config = dict(
69+
policy='step',
70+
warmup='linear',
71+
warmup_iters=500,
72+
warmup_ratio=1.0 / 3,
73+
step=[8, 11])
74+
total_epochs = 12
75+
evaluation = dict(interval=2)
Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from .anchor3d_head import Anchor3DHead
2+
from .anchor_free_mono3d_head import AnchorFreeMono3DHead
23
from .base_conv_bbox_head import BaseConvBboxHead
4+
from .base_mono3d_dense_head import BaseMono3DDenseHead
35
from .centerpoint_head import CenterHead
6+
from .fcos_mono3d_head import FCOSMono3DHead
47
from .free_anchor3d_head import FreeAnchor3DHead
58
from .parta2_rpn_head import PartA2RPNHead
69
from .shape_aware_head import ShapeAwareHead
@@ -9,5 +12,6 @@
912

1013
__all__ = [
1114
'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead',
12-
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead'
15+
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
16+
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead'
1317
]

0 commit comments

Comments
 (0)