tier4 · amadeuszsz · Oct 9, 2025 · Apr 15, 2025 · Apr 17, 2025 · Apr 21, 2025
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -12,7 +12,8 @@
 /projects/FRNet/ [email protected]
 /projects/GLIP/ [email protected]
 /projects/MobileNetv2/ [email protected]
-/projects/SparseConvolution/ [email protected]
+/projects/PTv3/ [email protected]
+/projects/SparseConvolution/ [email protected] [email protected]
 /projects/SwinTransformer/ [email protected]
 /projects/template/ [email protected]
 /projects/TransFusion/ [email protected] [email protected]

@@ -0,0 +1,18 @@
+ARG AWML_BASE_IMAGE="autoware-ml"
+FROM ${AWML_BASE_IMAGE}
+
+ENV FLASH_ATTN_CUDA_ARCHS="120"
+ENV PYTHONPATH=/workspace/projects:/workspace/projects/PTv3
+
+RUN python3 -m pip --no-cache-dir install \
+  addict \
+  open3d \
+  flash-attn --no-build-isolation \
+  regex \
+  sharedarray \
+  spconv-cu120 \
+  tensorboardx \
+  termcolor \
+  torch-scatter -f https://data.pyg.org/whl/torch-2.8.0+cu129.html \
+  tqdm \
+  yapf==0.40.1
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Pointcept
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,67 @@
+# Point Transformer V3 (PTv3)
+
+PTv3 is a lidar segmentation model.
+AWML's implementation is a port of the [original code](https://github.com/Pointcept/Pointcept), trimming unused parts of the code base, while also adding support for t4dataset and onnx export.
+
+## Summary
+
+- ROS package: [Link](https://github.com/autowarefoundation/autoware_universe/pull/10600)
+- Supported datasets
+  - [x] NuScenes
+  - [x] T4dataset
+- Other supported features
+  - [x] ONNX export & TensorRT inference
+
+## Results and models
+
+- TODO
+
+
+## Get started
+### 1. Setup
+
+- This project requires a different docker environment that most other projects.
+
+```sh
+DOCKER_BUILDKIT=1 docker build -t autoware-ml-ptv3 -f projects/PTv3/Dockerfile . --progress=plain
+```
+
+-Run docker
+
+```sh
+docker run -it --rm --gpus '"device=0"' --shm-size=64g --name awml -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml-ptv3
+```
+
+### 2. Train
+
+To train the model, use the following commands:
+
+```sh
+python projects/PTv3/tools/train.py --config-file projects/PTv3/configs/semseg-pt-v3m1-0-t4dataset.py --num-gpus 1
+```
+
+To test the model, use the following commands:
+
+```sh
+python projects/PTv3/tools/test.py --config-file projects/PTv3/configs/semseg-pt-v3m1-0-t4dataset.py --num-gpus 1 \
+  --options \
+  save_path=data/experiment \
+  weight=work_dirs/ptv3/model/model_best.pth
+```
+
+### 3. Deployment
+
+Export the model:
+
+```sh
+python projects/PTv3/tools/export.py --config-file projects/PTv3/configs/semseg-pt-v3m1-0-t4dataset.py --num-gpus 1 \
+  --options \
+  save_path=data/experiment \
+  weight=work_dirs/ptv3/model/model_best.pth
+```
+
+which will generate a file called `ptv3.onnx`
+
+## Reference
+
+- [Pointcept's PTv3](https://github.com/Pointcept/Pointcept)
@@ -0,0 +1,39 @@
+weight = None  # path to model weight
+resume = False  # whether to resume training process
+evaluate = True  # evaluate after each epoch training process
+test_only = False  # test process
+
+seed = None  # train process will init a random seed and record
+save_path = "work_dirs/ptv3"
+num_worker = 16  # total worker in all gpu
+batch_size = 16  # total batch size in all gpu
+batch_size_val = None  # auto adapt to bs 1 for each gpu
+batch_size_test = None  # auto adapt to bs 1 for each gpu
+epoch = 100  # total epoch, data loop = epoch // eval_epoch
+eval_epoch = 100  # sche total eval & checkpoint epoch
+clip_grad = 1.0  # disable with None, enable with a float
+
+sync_bn = False
+enable_amp = False
+empty_cache = False
+empty_cache_per_epoch = False
+find_unused_parameters = False
+
+mix_prob = 0
+param_dicts = None  # example: param_dicts = [dict(keyword="block", lr_scale=0.1)]
+
+# hook
+hooks = [
+    dict(type="CheckpointLoader"),
+    dict(type="IterationTimer", warmup_iter=2),
+    dict(type="InformationWriter"),
+    dict(type="SemSegEvaluator"),
+    dict(type="CheckpointSaver", save_freq=None),
+    dict(type="PreciseEvaluator", test_last=False),
+]
+
+# Trainer
+train = dict(type="DefaultTrainer")
+
+# Tester
+test = dict(type="SemSegTester", verbose=True)
@@ -0,0 +1,215 @@
+_base_ = ["./_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 4  # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+    type="DefaultSegmentorV2",
+    num_classes=16,
+    backbone_out_channels=64,
+    backbone=dict(
+        type="PT-v3m1",
+        in_channels=4,
+        order=["z", "z-trans", "hilbert", "hilbert-trans"],
+        stride=(2, 2, 2, 2),
+        enc_depths=(2, 2, 2, 6, 2),
+        enc_channels=(32, 64, 128, 256, 512),
+        enc_num_head=(2, 4, 8, 16, 32),
+        enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+        dec_depths=(2, 2, 2, 2),
+        dec_channels=(64, 64, 128, 256),
+        dec_num_head=(4, 4, 8, 16),
+        dec_patch_size=(1024, 1024, 1024, 1024),
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        attn_drop=0.0,
+        proj_drop=0.0,
+        drop_path=0.3,
+        shuffle_orders=True,
+        pre_norm=True,
+        enable_rpe=False,
+        enable_flash=True,
+        upcast_attention=False,
+        upcast_softmax=False,
+        cls_mode=False,
+        pdnorm_bn=False,
+        pdnorm_ln=False,
+        pdnorm_decouple=True,
+        pdnorm_adaptive=False,
+        pdnorm_affine=True,
+        pdnorm_conditions=("nuScenes", "SemanticKITTI", "Waymo"),
+    ),
+    criteria=[
+        dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+        dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+    ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+    type="OneCycleLR",
+    max_lr=[0.002, 0.0002],
+    pct_start=0.04,
+    anneal_strategy="cos",
+    div_factor=10.0,
+    final_div_factor=100.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0002)]
+
+# dataset settings
+dataset_type = "NuScenesDataset"
+data_root = "data/nuscenes"
+ignore_index = -1
+names = [
+    "barrier",
+    "bicycle",
+    "bus",
+    "car",
+    "construction_vehicle",
+    "motorcycle",
+    "pedestrian",
+    "traffic_cone",
+    "trailer",
+    "truck",
+    "driveable_surface",
+    "other_flat",
+    "sidewalk",
+    "terrain",
+    "manmade",
+    "vegetation",
+]
+
+data = dict(
+    num_classes=16,
+    ignore_index=ignore_index,
+    names=names,
+    train=dict(
+        type=dataset_type,
+        split="train",
+        data_root=data_root,
+        transform=[
+            # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+            # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+            dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+            # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+            # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+            dict(type="RandomScale", scale=[0.9, 1.1]),
+            # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+            dict(type="RandomFlip", p=0.5),
+            dict(type="RandomJitter", sigma=0.005, clip=0.02),
+            # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+            dict(
+                type="GridSample",
+                grid_size=0.05,
+                hash_type="fnv",
+                mode="train",
+                keys=("coord", "strength", "segment"),
+                return_grid_coord=True,
+            ),
+            # dict(type="SphereCrop", point_max=1000000, mode="random"),
+            # dict(type="CenterShift", apply_z=False),
+            dict(type="ToTensor"),
+            dict(
+                type="Collect",
+                keys=("coord", "grid_coord", "segment"),
+                feat_keys=("coord", "strength"),
+            ),
+        ],
+        test_mode=False,
+        ignore_index=ignore_index,
+    ),
+    val=dict(
+        type=dataset_type,
+        split="val",
+        data_root=data_root,
+        transform=[
+            # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
+            dict(
+                type="GridSample",
+                grid_size=0.05,
+                hash_type="fnv",
+                mode="train",
+                keys=("coord", "strength", "segment"),
+                return_grid_coord=True,
+            ),
+            # dict(type="SphereCrop", point_max=1000000, mode='center'),
+            dict(type="ToTensor"),
+            dict(
+                type="Collect",
+                keys=("coord", "grid_coord", "segment"),
+                feat_keys=("coord", "strength"),
+            ),
+        ],
+        test_mode=False,
+        ignore_index=ignore_index,
+    ),
+    test=dict(
+        type=dataset_type,
+        split="val",
+        data_root=data_root,
+        transform=[
+            dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+            dict(
+                type="GridSample",
+                grid_size=0.025,
+                hash_type="fnv",
+                mode="train",
+                keys=("coord", "strength", "segment"),
+                return_inverse=True,
+            ),
+        ],
+        test_mode=True,
+        test_cfg=dict(
+            voxelize=dict(
+                type="GridSample",
+                grid_size=0.05,
+                hash_type="fnv",
+                mode="test",
+                return_grid_coord=True,
+                keys=("coord", "strength"),
+            ),
+            crop=None,
+            post_transform=[
+                dict(type="ToTensor"),
+                dict(
+                    type="Collect",
+                    keys=("coord", "grid_coord", "index"),
+                    feat_keys=("coord", "strength"),
+                ),
+            ],
+            aug_transform=[
+                [dict(type="RandomScale", scale=[0.9, 0.9])],
+                [dict(type="RandomScale", scale=[0.95, 0.95])],
+                [dict(type="RandomScale", scale=[1, 1])],
+                [dict(type="RandomScale", scale=[1.05, 1.05])],
+                [dict(type="RandomScale", scale=[1.1, 1.1])],
+                [
+                    dict(type="RandomScale", scale=[0.9, 0.9]),
+                    dict(type="RandomFlip", p=1),
+                ],
+                [
+                    dict(type="RandomScale", scale=[0.95, 0.95]),
+                    dict(type="RandomFlip", p=1),
+                ],
+                [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+                [
+                    dict(type="RandomScale", scale=[1.05, 1.05]),
+                    dict(type="RandomFlip", p=1),
+                ],
+                [
+                    dict(type="RandomScale", scale=[1.1, 1.1]),
+                    dict(type="RandomFlip", p=1),
+                ],
+            ],
+        ),
+        ignore_index=ignore_index,
+    ),
+)