3333333333333333333333333333333333
立即下载
资源介绍:
3333333333333333333333
import copy
import math
import os
import cv2
import horizon_plugin_pytorch as horizon
import numpy as np
import torch
from hbdk.torch_script.placeholder import placeholder
from horizon_plugin_pytorch.march import March
from PIL import Image
from torch import Tensor
from torchvision.transforms.functional import pil_to_tensor
from torchvision.transforms.functional_tensor import resize
import hat.data.datasets.nuscenes_dataset as NuscenesDataset
from hat.data.collates.nusc_collates import collate_nuscenes
from hat.metrics.mean_iou import MeanIOU
from hat.utils.config import ConfigVersion
VERSION = ConfigVersion.v2
training_step = os.environ.get("HAT_TRAINING_STEP", "float")
task_name = "bev_lss_efficientnetb0_multitask_nuscenes"
bn_kwargs = {}
refine_levels = 4
maxdisp = 192
batch_size_per_gpu = 4
device_ids = [0, 1, 2, 3]
dataloader_workers = batch_size_per_gpu # per gpu
ckpt_dir = "./tmp_models/%s" % task_name
cudnn_benchmark = True
seed = None
log_rank_zero_only = True
march = March.BAYES
convert_mode = "fx"
enable_amp = False
orig_shape = (3, 900, 1600)
resize_shape = (3, 396, 704)
data_shape = (3, 256, 704)
val_data_shape = (3, 256, 704)
bn_kwargs = dict(eps=2e-5, momentum=0.1)
vt_input_hw = (
16,
44,
) # view transformer input shape for generationg reference points.
weight_decay = 0.01
start_lr = 2e-4
train_epochs = 30
bev_size = (51.2, 51.2, 0.8)
grid_size = (128, 128)
map_size = (15, 30, 0.15)
task_map_size = (15, 30, 0.15)
qat_lr = 2e-5
qat_train_epochs = 10
data_rootdir = "./tmp_data/nuscenes/v1.0-trainval/"
meta_rootdir = "./tmp_data/nuscenes/meta"
seg_classes_name = ["others", "divider", "ped_crossing", "Boundary"]
use_bce = False
if use_bce:
seg_classes = 3
else:
seg_classes = 3 + 1
depth = 60
num_points = 10
# model
def get_grid_quant_scale(grid_shape, view_shape):
max_coord = max(*grid_shape, *view_shape)
coord_bit_num = math.ceil(math.log(max_coord + 1, 2))
coord_shift = 15 - coord_bit_num
coord_shift = max(min(coord_shift, 8), 0)
grid_quant_scale = 1.0 / (1 << coord_shift)
return grid_quant_scale
view_shape = [data_shape[1] / 16, data_shape[2] / 16]
featview_shape = [view_shape[0] * 6, view_shape[1]]
grid_quant_scale = get_grid_quant_scale(grid_size, featview_shape)
depthview_shape = [6 * depth, view_shape[0] * view_shape[1]]
depth_quant_scale = get_grid_quant_scale(grid_size, depthview_shape)
map_shape = [
int(task_map_size[1] * 2 / task_map_size[2]),
int(task_map_size[0] * 2 / task_map_size[2]),
]
map_grid_quant_scale = get_grid_quant_scale(map_shape, view_shape)
tasks = [
dict(name="car", num_class=1, class_names=["car"]),
dict(
name="truck",
num_class=2,
class_names=["truck", "construction_vehicle"],
),
dict(name="bus", num_class=2, class_names=["bus", "trailer"]),
dict(name="barrier", num_class=1, class_names=["barrier"]),
dict(name="bicycle", num_class=2, class_names=["motorcycle", "bicycle"]),
dict(
name="pedestrian",
num_class=2,
class_names=["pedestrian", "traffic_cone"],
),
]
model = dict(
type="ViewFusion",
bev_feat_index=-1,
bev_upscale=2,
backbone=dict(
type="efficientnet",
bn_kwargs=bn_kwargs,
model_type="b0",
num_classes=1000,
include_top=False,
activation="relu",
use_se_block=False,
),
neck=dict(
type="FastSCNNNeck",
in_channels=[112, 320],
feat_channels=[64, 64],
indexes=[-2, -1],
bn_kwargs=bn_kwargs,
scale_factor=2,
),
stereoNetHeadPlus=dict(
type="StereoNetHead",
maxdisp=maxdisp,
bn_kwargs=bn_kwargs,
refine_levels=refine_levels,
),
stereoNetPostProcessPlus=dict(
type="StereoNetPostProcess",
maxdisp=maxdisp,
),
view_transformer=dict(
type="LSSTransformer",
in_channels=64,
feat_channels=64,
z_range=(-10.0, 10.0),
depth=depth,
num_points=num_points,
bev_size=bev_size,
grid_size=grid_size,
num_views=6,
grid_quant_scale=grid_quant_scale,
depth_grid_quant_scale=depth_quant_scale,
),
bev_transforms=[
dict(
type="BevFeatureRotate",
bev_size=bev_size,
rot=(-0.3925, 0.3925),
),
],
bev_encoder=dict(
type="BevEncoder",
backbone=dict(
type="efficientnet",
bn_kwargs=bn_kwargs,
model_type="b0",
num_classes=1000,
include_top=False,
activation="relu",
use_se_block=False,
input_channels=64,
quant_input=False,
),
neck=dict(
type="BiFPN",
in_strides=[2, 4, 8, 16, 32],
out_strides=[2, 4, 8, 16, 32],
stride2channels=dict({2: 16, 4: 24, 8: 40, 16: 112, 32: 320}),
out_channels=48,
num_outs=5,
stack=3,
start_level=0,
end_level=-1,
fpn_name="bifpn_sum",
upsample_type="function",
use_fx=True,
),
),
bev_decoders=[
dict(
type="BevSegDecoder",
name="bev_seg",
use_bce=use_bce,
bev_size=bev_size,
task_size=task_map_size,
grid_quant_scale=map_grid_quant_scale,
task_weight=10.0,
head=dict(
type="DepthwiseSeparableFCNHead",
input_index=0,
in_channels=48,
feat_channels=48,
num_classes=seg_classes,
dropout_ratio=0.1,
num_convs=2,
bn_kwargs=bn_kwargs,
int8_output=False,
),
target=dict(
type="FCNTarget",
),
loss=dict(
type="CrossEntropyLoss",
loss_name="seg",
reduction="mean",
ignore_index=-1,
use_sigmoid=use_bce,
class_weight=2.0 if use_bce else [1.0, 5.0, 5.0, 5.0],
),
decoder=dict(
type="FCNDecoder",
upsample_output_scale=1,
use_bce=use_bce,
bg_cls=-1,
),
),
dict(
type="BevDetDecoder",
name="bev_det",
task_weight=1.0,
head=dict(
type="DepthwiseSeparableCenterPointHead",
in_channels=48,
tasks=tasks,
share_conv_channels=48,
share_conv_num=1,
common_heads=dict(
reg=(2, 2),
height=(1, 2),
dim=(3, 2),
rot=(2, 2),
vel=(2, 2),
),
head_conv_channels=48,
num_heatmap_convs=2,
final_kernel=3,
),
target=dict(
type="CenterPointTarget",
class_names=NuscenesDataset.CLASSES,
tasks=tasks,
gaussian_overlap=0.1,
min_radius=2,
out_size_factor=1,
norm_bbox=True,
max_num=500,
bbox_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
),
loss_cls=dict(type="GaussianFocalLoss", loss_weight=1.0),
loss_reg=dict(
type="L1Loss",
loss_weight=0.25,
),
decoder=dict(
type="CenterPointDecoder",
class_names=NuscenesDataset.CLASSES,
tasks=tasks,
bev_size=bev_size,
out_size_factor=1,
score_threshold=0.1,
use_max_pool=True,
nms_type=[
"rotate",
"rotate",