From ed628155231174daed41bbd26a4df11998874eb9 Mon Sep 17 00:00:00 2001 From: Fzilan Date: Thu, 10 Jul 2025 19:34:15 +0800 Subject: [PATCH] add copyright --- examples/seg/deeplabv3/deeplabv3.py | 5 +- .../deeplabv3/preprocess/build_seg_data.py | 2 + .../seg/deeplabv3/preprocess/get_data_list.py | 2 + mindcv/data/auto_augment.py | 10 +++- mindcv/data/dataset_factory.py | 2 +- mindcv/data/distributed_sampler.py | 3 +- mindcv/data/loader.py | 2 +- mindcv/data/mixup.py | 4 ++ mindcv/data/transforms_factory.py | 5 ++ mindcv/models/bit.py | 1 + mindcv/models/cait.py | 20 ++++++-- mindcv/models/cmt.py | 4 ++ mindcv/models/coat.py | 10 ++++ mindcv/models/convit.py | 31 ++++++++++-- mindcv/models/convnext.py | 50 +++++++++++++++++-- mindcv/models/crossvit.py | 30 +++++++++-- mindcv/models/densenet.py | 2 +- mindcv/models/dpn.py | 14 +++++- mindcv/models/edgenext.py | 15 +++++- mindcv/models/efficientnet.py | 11 +++- mindcv/models/ghostnet.py | 2 +- mindcv/models/halonet.py | 1 + mindcv/models/hrnet.py | 11 +++- mindcv/models/inceptionv3.py | 4 ++ mindcv/models/inceptionv4.py | 5 ++ mindcv/models/layers/compatibility.py | 2 + mindcv/models/layers/conv_norm_act.py | 4 +- mindcv/models/layers/drop_path.py | 3 ++ mindcv/models/layers/format.py | 2 + mindcv/models/layers/mlp.py | 1 + mindcv/models/layers/patch_dropout.py | 2 + mindcv/models/layers/patch_embed.py | 8 ++- mindcv/models/layers/pos_embed.py | 4 +- mindcv/models/layers/selective_kernel.py | 1 + mindcv/models/mae.py | 5 ++ mindcv/models/mixnet.py | 1 + mindcv/models/mlpmixer.py | 9 +++- mindcv/models/mnasnet.py | 1 + mindcv/models/mobilenetv3.py | 13 +++-- mindcv/models/mobilevit.py | 25 ++++++++-- mindcv/models/nasnet.py | 2 +- mindcv/models/pit.py | 21 ++++++-- mindcv/models/pnasnet.py | 2 +- mindcv/models/poolformer.py | 1 + mindcv/models/pvt.py | 13 ++++- mindcv/models/pvtv2.py | 22 +++++++- mindcv/models/regnet.py | 7 ++- mindcv/models/repmlp.py | 1 + mindcv/models/repvgg.py | 1 + mindcv/models/res2net.py | 2 +- mindcv/models/resnest.py | 13 ++++- mindcv/models/resnet.py | 16 ++++-- mindcv/models/resnetv2.py | 39 +++++++++++++-- mindcv/models/rexnet.py | 21 ++++++-- mindcv/models/senet.py | 2 +- mindcv/models/shufflenetv2.py | 1 + mindcv/models/sknet.py | 18 +++++-- mindcv/models/squeezenet.py | 1 + mindcv/models/swintransformer.py | 24 ++++++++- mindcv/models/swintransformerv2.py | 22 ++++++-- mindcv/models/vgg.py | 15 ++++-- mindcv/models/visformer.py | 16 ++++-- mindcv/models/vit.py | 32 +++++++++++- mindcv/models/volo.py | 31 ++++++++++-- mindcv/models/xception.py | 8 ++- mindcv/models/xcit.py | 18 +++++-- 66 files changed, 594 insertions(+), 82 deletions(-) diff --git a/examples/seg/deeplabv3/deeplabv3.py b/examples/seg/deeplabv3/deeplabv3.py index 2d2f358d5..d3a15c0f9 100644 --- a/examples/seg/deeplabv3/deeplabv3.py +++ b/examples/seg/deeplabv3/deeplabv3.py @@ -1,4 +1,7 @@ -"""DeeplabV3, DeeplabV3+ implement with replaceable backbones""" +""" +DeeplabV3, DeeplabV3+ implement with replaceable backbones +Adapted from https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/deeplabv3.py +""" from typing import List diff --git a/examples/seg/deeplabv3/preprocess/build_seg_data.py b/examples/seg/deeplabv3/preprocess/build_seg_data.py index 1147e6984..4a8deb861 100644 --- a/examples/seg/deeplabv3/preprocess/build_seg_data.py +++ b/examples/seg/deeplabv3/preprocess/build_seg_data.py @@ -1,3 +1,5 @@ +# Copy from https://github.com/mindspore-ai/models/blob/master/official/cv/DeepLabv3/src/data/build_seg_data.py + import argparse import os diff --git a/examples/seg/deeplabv3/preprocess/get_data_list.py b/examples/seg/deeplabv3/preprocess/get_data_list.py index bc7c11786..4412c03be 100644 --- a/examples/seg/deeplabv3/preprocess/get_data_list.py +++ b/examples/seg/deeplabv3/preprocess/get_data_list.py @@ -1,3 +1,5 @@ +# Copy from https://github.com/mindspore-ai/models/blob/master/official/cv/DeepLabv3/src/data/get_dataset_lst.py + import argparse import os diff --git a/mindcv/data/auto_augment.py b/mindcv/data/auto_augment.py index c74829bcd..831228038 100644 --- a/mindcv/data/auto_augment.py +++ b/mindcv/data/auto_augment.py @@ -1,8 +1,14 @@ """ -AutoAugment and RandAugment for mindspore. +Copyright 2019, Ross Wightman + +Modifications made by MindSpore team in 2023 to support the MindSpore framework. Adapted from: - https://github.com/rwightman/pytorch-image-models/blob/main/timm/data/auto_augment.py + https://github.com/rwightman/pytorch-image-models/blob/main/timm/data/auto_augment.py. + + + +AutoAugment and RandAugment for mindspore. Papers: AutoAugment: Learning Augmentation Policies from Data - https://arxiv.org/abs/1805.09501 diff --git a/mindcv/data/dataset_factory.py b/mindcv/data/dataset_factory.py index 5f630face..2d0d81394 100644 --- a/mindcv/data/dataset_factory.py +++ b/mindcv/data/dataset_factory.py @@ -1,5 +1,5 @@ """ -Create dataset by name +Create dataset by name on MindSpore """ import logging diff --git a/mindcv/data/distributed_sampler.py b/mindcv/data/distributed_sampler.py index 21deec0c0..f51633e72 100644 --- a/mindcv/data/distributed_sampler.py +++ b/mindcv/data/distributed_sampler.py @@ -1,4 +1,5 @@ -""" distributed sampler """ +""" MindSpore Distributed Sampler """ + import logging import math diff --git a/mindcv/data/loader.py b/mindcv/data/loader.py index 0fb63dec4..10b7a1d37 100644 --- a/mindcv/data/loader.py +++ b/mindcv/data/loader.py @@ -1,5 +1,5 @@ """ -Create dataloader +Create dataloader on MindSpore """ import inspect diff --git a/mindcv/data/mixup.py b/mindcv/data/mixup.py index d6552498a..44477ebbe 100644 --- a/mindcv/data/mixup.py +++ b/mindcv/data/mixup.py @@ -9,6 +9,10 @@ CutMix: https://github.com/clovaai/CutMix-PyTorch Hacked together by / Copyright 2020 Ross Wightman + +Modifications made to support the MindSpore framework. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/data/mixup.py + """ import numpy as np diff --git a/mindcv/data/transforms_factory.py b/mindcv/data/transforms_factory.py index d98e8f86f..4d3938c74 100644 --- a/mindcv/data/transforms_factory.py +++ b/mindcv/data/transforms_factory.py @@ -1,5 +1,10 @@ """ Transform operation list + +Hacked together by / Copyright 2019, Ross Wightman +Modifications made to support the MindSpore framework. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/data/transforms_factory.py + """ import math diff --git a/mindcv/models/bit.py b/mindcv/models/bit.py index 2a0fd3a76..11a5cb5b7 100644 --- a/mindcv/models/bit.py +++ b/mindcv/models/bit.py @@ -1,6 +1,7 @@ """ MindSpore implementation of `BiT_ResNet`. Refer to Big Transfer (BiT): General Visual Representation Learning. +Adapted from https://github.com/google-research/big_transfer. """ from typing import List, Optional, Type, Union diff --git a/mindcv/models/cait.py b/mindcv/models/cait.py index 40ca6ec13..3c60ad8ec 100644 --- a/mindcv/models/cait.py +++ b/mindcv/models/cait.py @@ -1,7 +1,21 @@ +""" Class-Attention in Image Transformers (CaiT) + +Paper: 'Going deeper with Image Transformers' - https://arxiv.org/abs/2103.17239 + +Original code and weights from https://github.com/facebookresearch/deit, copyright below + +Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/cait.py """ -MindSpore implementation of `CaiT`. -Refer to Going deeper with Image Transformers. -""" + +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. + from functools import partial diff --git a/mindcv/models/cmt.py b/mindcv/models/cmt.py index f2b16cfa3..0b5e654f3 100644 --- a/mindcv/models/cmt.py +++ b/mindcv/models/cmt.py @@ -1,3 +1,7 @@ +""" +MindSpore implementation of `CMT`. +Adapted from https://github.com/junjie18/CMT +""" import numpy as np import mindspore diff --git a/mindcv/models/coat.py b/mindcv/models/coat.py index 09645d333..92faaca0d 100644 --- a/mindcv/models/coat.py +++ b/mindcv/models/coat.py @@ -1,6 +1,16 @@ """ CoaT architecture. +Paper: Co-Scale Conv-Attentional Image Transformers - https://arxiv.org/abs/2104.06399 + +Official CoaT code at: https://github.com/mlpc-ucsd/CoaT Modified from timm/models/vision_transformer.py + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/coat.py + """ from typing import Union diff --git a/mindcv/models/convit.py b/mindcv/models/convit.py index 45884b9c1..82edebdad 100644 --- a/mindcv/models/convit.py +++ b/mindcv/models/convit.py @@ -1,7 +1,32 @@ +""" ConViT Model + +@article{d2021convit, + title={ConViT: Improving Vision Transformers with Soft Convolutional Inductive Biases}, + author={d'Ascoli, St{\'e}phane and Touvron, Hugo and Leavitt, Matthew and Morcos, + Ari and Biroli, Giulio and Sagun, Levent}, + journal={arXiv preprint arXiv:2103.10697}, + year={2021} +} + +Paper link: https://arxiv.org/abs/2103.10697 +Original code: https://github.com/facebookresearch/convit, original copyright below + +Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/convit.py + """ -MindSpore implementation of `ConViT`. -Refer to ConViT: Improving Vision Transformers with Soft Convolutional Inductive Biases -""" + +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the CC-by-NC license found in the +# LICENSE file in the root directory of this source tree. +# import numpy as np diff --git a/mindcv/models/convnext.py b/mindcv/models/convnext.py index f62eca650..568c305fd 100644 --- a/mindcv/models/convnext.py +++ b/mindcv/models/convnext.py @@ -1,8 +1,50 @@ +""" ConvNeXt + +Papers: +* `A ConvNet for the 2020s` - https://arxiv.org/pdf/2201.03545.pdf +@Article{liu2022convnet, + author = {Zhuang Liu and Hanzi Mao and Chao-Yuan Wu and Christoph Feichtenhofer and Trevor Darrell and Saining Xie}, + title = {A ConvNet for the 2020s}, + journal = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2022}, +} + +* `ConvNeXt-V2 - Co-designing and Scaling ConvNets with Masked Autoencoders` - https://arxiv.org/abs/2301.00808 +@article{Woo2023ConvNeXtV2, + title={ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders}, + author={Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon and Saining Xie}, + year={2023}, + journal={arXiv preprint arXiv:2301.00808}, +} + +Original code and weights from: +* https://github.com/facebookresearch/ConvNeXt, original copyright below +* https://github.com/facebookresearch/ConvNeXt-V2, original copyright below + +Model defs atto, femto, pico, nano and _ols / _hnf variants are timm originals. + +Modifications and additions for timm hacked together by / Copyright 2022, Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/convnext.py + """ -MindSpore implementation of `ConvNeXt` and `ConvNeXt V2`. -Refer to: A ConvNet for the 2020s - ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders -""" +# ConvNeXt +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# This source code is licensed under the MIT license + +# ConvNeXt-V2 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree (Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)) +# No code was used directly from ConvNeXt-V2, however the weights are CC BY-NC 4.0 so beware if using commercially. + + from typing import List, Tuple import numpy as np diff --git a/mindcv/models/crossvit.py b/mindcv/models/crossvit.py index 3a00711e1..8f880fe0f 100644 --- a/mindcv/models/crossvit.py +++ b/mindcv/models/crossvit.py @@ -1,7 +1,31 @@ +""" CrossViT Model + +@inproceedings{ + chen2021crossvit, + title={{CrossViT: Cross-Attention Multi-Scale Vision Transformer for Image Classification}}, + author={Chun-Fu (Richard) Chen and Quanfu Fan and Rameswar Panda}, + booktitle={International Conference on Computer Vision (ICCV)}, + year={2021} +} + +Paper link: https://arxiv.org/abs/2103.14899 +Original code: https://github.com/IBM/CrossViT/blob/main/models/crossvit.py + +NOTE: model names have been renamed from originals to represent actual input res all *_224 -> *_240 and *_384 -> *_408 + +Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman +Modified from Timm. https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/crossvit.py + """ -MindSpore implementation of `crossvit`. -Refer to crossvit: Cross-Attention Multi-Scale Vision Transformer for Image Classification -""" + +# Copyright IBM All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 import numpy as np diff --git a/mindcv/models/densenet.py b/mindcv/models/densenet.py index f8b294266..19e48426f 100644 --- a/mindcv/models/densenet.py +++ b/mindcv/models/densenet.py @@ -1,6 +1,6 @@ """ MindSpore implementation of `DenseNet`. -Refer to: Densely Connected Convolutional Networks +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/densenet.py """ import math diff --git a/mindcv/models/dpn.py b/mindcv/models/dpn.py index 24aa00bc3..4bb1957a2 100644 --- a/mindcv/models/dpn.py +++ b/mindcv/models/dpn.py @@ -1,6 +1,16 @@ """ -MindSpore implementation of `DPN`. -Refer to: Dual Path Networks +Based on original MXNet implementation https://github.com/cypw/DPNs with +many ideas from another PyTorch implementation https://github.com/oyam/pytorch-DPNs. + +This implementation is compatible with the pretrained weights from cypw's MXNet implementation. + +Hacked together by / Copyright 2020 Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/dpn.py """ import math diff --git a/mindcv/models/edgenext.py b/mindcv/models/edgenext.py index 82ed070de..59cf758af 100644 --- a/mindcv/models/edgenext.py +++ b/mindcv/models/edgenext.py @@ -1,6 +1,17 @@ """ -MindSpore implementation of `edgenext`. -Refer to EdgeNeXt: Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision Applications. +Paper: `EdgeNeXt: Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision Applications` + - https://arxiv.org/abs/2206.10589 + +Original code and weights from https://github.com/mmaaz60/EdgeNeXt + +Modifications and additions for timm by / Copyright 2022, Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/edgenext.py + """ import math diff --git a/mindcv/models/efficientnet.py b/mindcv/models/efficientnet.py index def05d045..083374dcc 100644 --- a/mindcv/models/efficientnet.py +++ b/mindcv/models/efficientnet.py @@ -1,4 +1,13 @@ -"""EfficientNet Architecture.""" +"""EfficientNet Architecture. +Hacked together by / Copyright 2019, Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/efficientnet.py + +""" import copy import math diff --git a/mindcv/models/ghostnet.py b/mindcv/models/ghostnet.py index 87bab1f64..9e52f1d23 100644 --- a/mindcv/models/ghostnet.py +++ b/mindcv/models/ghostnet.py @@ -1,5 +1,5 @@ """MindSpore implementation of `GhostNet`. -Refer to GhostNet: More Features from Cheap Operations. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/ghostnet.py """ import math diff --git a/mindcv/models/halonet.py b/mindcv/models/halonet.py index c32495e70..8d571fb75 100644 --- a/mindcv/models/halonet.py +++ b/mindcv/models/halonet.py @@ -1,6 +1,7 @@ """ MindSpore implementation of `HaloNet`. Refer to Scaling Local Self-Attention for Parameter Effificient Visual Backbones. +Adapted from https://github.com/maurbe/halo-net """ import mindspore as ms import mindspore.common.initializer as init diff --git a/mindcv/models/hrnet.py b/mindcv/models/hrnet.py index 1095c47bb..02efa32cc 100644 --- a/mindcv/models/hrnet.py +++ b/mindcv/models/hrnet.py @@ -1,6 +1,15 @@ """ +Copied from https://github.com/HRNet/HRNet-Image-Classification + +Original header: + Copyright (c) Microsoft + Licensed under the MIT License. + Written by Bin Xiao (Bin.Xiao@microsoft.com) + Modified by Ke Sun (sunk@mail.ustc.edu.cn) + +------------------------------------------------------------------------ MindSpore implementation of `HRNet`. -Refer to Deep High-Resolution Representation Learning for Visual Recognition +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/hrnet.py """ from typing import Any, Dict, List, Optional, Tuple, Type, Union diff --git a/mindcv/models/inceptionv3.py b/mindcv/models/inceptionv3.py index 8e15c5dfa..7dc75fdea 100644 --- a/mindcv/models/inceptionv3.py +++ b/mindcv/models/inceptionv3.py @@ -1,6 +1,10 @@ """ +Originally from torchvision Inception3 model +Licensed BSD-Clause 3 https://github.com/pytorch/vision/blob/master/LICENSE + MindSpore implementation of `InceptionV3`. Refer to Rethinking the Inception Architecture for Computer Vision. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/inception_v3.py """ from typing import Tuple, Union diff --git a/mindcv/models/inceptionv4.py b/mindcv/models/inceptionv4.py index 54bf81e34..5cb995e4b 100644 --- a/mindcv/models/inceptionv4.py +++ b/mindcv/models/inceptionv4.py @@ -1,6 +1,11 @@ """ +Sourced from https://github.com/Cadene/tensorflow-model-zoo.torch (MIT License) which is +based upon Google's Tensorflow implementation and pretrained weights (Apache 2.0 License) + + MindSpore implementation of `InceptionV4`. Refer to Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/inception_v4.py """ from typing import Tuple, Union diff --git a/mindcv/models/layers/compatibility.py b/mindcv/models/layers/compatibility.py index 8aecbae77..64a6302ca 100644 --- a/mindcv/models/layers/compatibility.py +++ b/mindcv/models/layers/compatibility.py @@ -1,3 +1,5 @@ +# Adapted from https://github.com/huggingface/pytorch-image-models/tree/main/timm/layers + import inspect import mindspore as ms diff --git a/mindcv/models/layers/conv_norm_act.py b/mindcv/models/layers/conv_norm_act.py index e141affc3..a22e023e9 100644 --- a/mindcv/models/layers/conv_norm_act.py +++ b/mindcv/models/layers/conv_norm_act.py @@ -1,4 +1,6 @@ -""" Conv2d + BN + Act""" +""" Conv2d + BN + Act +Adapted from https://github.com/huggingface/pytorch-image-models/tree/main/timm/layers +""" from typing import Optional from mindspore import nn diff --git a/mindcv/models/layers/drop_path.py b/mindcv/models/layers/drop_path.py index ea0374734..bd547d14c 100644 --- a/mindcv/models/layers/drop_path.py +++ b/mindcv/models/layers/drop_path.py @@ -2,6 +2,9 @@ Mindspore implementations of DropPath (Stochastic Depth) regularization layers. Papers: Deep Networks with Stochastic Depth (https://arxiv.org/abs/1603.09382) + +Adapted from https://github.com/huggingface/pytorch-image-models/tree/main/timm/layers + """ from mindspore import Tensor, nn, ops from mindspore.numpy import ones diff --git a/mindcv/models/layers/format.py b/mindcv/models/layers/format.py index 058a74517..1d87f9125 100644 --- a/mindcv/models/layers/format.py +++ b/mindcv/models/layers/format.py @@ -1,3 +1,5 @@ +# Adapted from https://github.com/huggingface/pytorch-image-models/tree/main/timm/layers + from enum import Enum from typing import Union diff --git a/mindcv/models/layers/mlp.py b/mindcv/models/layers/mlp.py index 7da27a4a2..efac49463 100644 --- a/mindcv/models/layers/mlp.py +++ b/mindcv/models/layers/mlp.py @@ -1,4 +1,5 @@ """ MLP module w/ dropout and configurable activation layer +Adapted from https://github.com/huggingface/pytorch-image-models/tree/main/timm/layers """ from typing import Optional diff --git a/mindcv/models/layers/patch_dropout.py b/mindcv/models/layers/patch_dropout.py index ad854dbfc..3540c5b5a 100644 --- a/mindcv/models/layers/patch_dropout.py +++ b/mindcv/models/layers/patch_dropout.py @@ -1,3 +1,5 @@ +# Adapted from https://github.com/huggingface/pytorch-image-models/tree/main/timm/layers + import numpy as np import mindspore as ms diff --git a/mindcv/models/layers/patch_embed.py b/mindcv/models/layers/patch_embed.py index 661e07890..84e02a123 100644 --- a/mindcv/models/layers/patch_embed.py +++ b/mindcv/models/layers/patch_embed.py @@ -1,5 +1,11 @@ """ Image to Patch Embedding using Conv2d -A convolution based approach to patchifying a 2D image w/ embedding projection.""" +A convolution based approach to patchifying a 2D image w/ embedding projection. +Based on code in: + * https://github.com/google-research/vision_transformer + * https://github.com/google-research/big_vision/tree/main/big_vision + +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/patch_embed.py +""" from typing import Optional from mindspore import Tensor, nn, ops diff --git a/mindcv/models/layers/pos_embed.py b/mindcv/models/layers/pos_embed.py index ba4548580..292da21b7 100644 --- a/mindcv/models/layers/pos_embed.py +++ b/mindcv/models/layers/pos_embed.py @@ -1,4 +1,6 @@ -"""positional embedding""" +"""positional embedding +Adapted from https://github.com/huggingface/pytorch-image-models/tree/main/timm/layers +""" import math from typing import List, Optional, Tuple diff --git a/mindcv/models/layers/selective_kernel.py b/mindcv/models/layers/selective_kernel.py index ddf6ebcad..66d804c4b 100644 --- a/mindcv/models/layers/selective_kernel.py +++ b/mindcv/models/layers/selective_kernel.py @@ -1,5 +1,6 @@ """ Selective Kernel Convolution/Attention Paper: Selective Kernel Networks (https://arxiv.org/abs/1903.06586) +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/selective_kernel.py """ from typing import List, Optional, Union diff --git a/mindcv/models/mae.py b/mindcv/models/mae.py index 4a5cf887e..bce03364e 100644 --- a/mindcv/models/mae.py +++ b/mindcv/models/mae.py @@ -1,3 +1,8 @@ +""" +MindSpore implementation of `MAE`. +Adpted from https://github.com/facebookresearch/mae. +""" + from functools import partial from typing import Callable, Optional diff --git a/mindcv/models/mixnet.py b/mindcv/models/mixnet.py index 9d9c35347..8efd8fbe7 100644 --- a/mindcv/models/mixnet.py +++ b/mindcv/models/mixnet.py @@ -1,6 +1,7 @@ """ MindSpore implementation of `MixNet`. Refer to MixConv: Mixed Depthwise Convolutional Kernels +Adpted from https://github.com/TUMFTM/MixNet. """ import math diff --git a/mindcv/models/mlpmixer.py b/mindcv/models/mlpmixer.py index 22e0ff7b6..90374a1de 100644 --- a/mindcv/models/mlpmixer.py +++ b/mindcv/models/mlpmixer.py @@ -1,6 +1,11 @@ """ -MindSpore implementation of `MLP-Mixer`. -Refer to MLP-Mixer: An all-MLP Architecture for Vision. +Hacked together by / Copyright 2021 Ross Wightman + +-------------------------------------------------- +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/mlp_mixer.py """ import mindspore.nn as nn diff --git a/mindcv/models/mnasnet.py b/mindcv/models/mnasnet.py index f77ce0ffd..fe38aa64c 100644 --- a/mindcv/models/mnasnet.py +++ b/mindcv/models/mnasnet.py @@ -1,6 +1,7 @@ """ MindSpore implementation of `MnasNet`. Refer to MnasNet: Platform-Aware Neural Architecture Search for Mobile. +Adapted from https://github.com/pytorch/vision/blob/main/torchvision/models/mnasnet.py """ from typing import List diff --git a/mindcv/models/mobilenetv3.py b/mindcv/models/mobilenetv3.py index 6d911d4e8..5d59f8bd7 100644 --- a/mindcv/models/mobilenetv3.py +++ b/mindcv/models/mobilenetv3.py @@ -1,6 +1,13 @@ -""" -MindSpore implementation of `MobileNetV3`. -Refer to Searching for MobileNetV3. +""" MobileNet V3 +Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244 + +Hacked together by / Copyright 2019, Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/mobilenetv3.py """ import math diff --git a/mindcv/models/mobilevit.py b/mindcv/models/mobilevit.py index bd258665a..37862620e 100644 --- a/mindcv/models/mobilevit.py +++ b/mindcv/models/mobilevit.py @@ -1,7 +1,26 @@ +""" MobileViT + +Paper: +V1: `MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer` + - https://arxiv.org/abs/2110.02178 +V2: `Separable Self-attention for Mobile Vision Transformers` + - https://arxiv.org/abs/2206.02680 + +MobileVitBlock and checkpoints adapted from https://github.com/apple/ml-cvnets (original copyright below) +License: https://github.com/apple/ml-cvnets/blob/main/LICENSE (Apple open source) + +Rest of code, ByobNet, and Transformer block hacked together by / Copyright 2022, Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/mobilevit.py """ -MindSpore implementation of `MobileViT`. -Refer to MobileViT:Light-weight, General-purpose, and Mobile-friendly Vision Transformer. -""" +# +# For licensing see accompanying LICENSE file. +# Copyright (C) 2020 Apple Inc. All Rights Reserved. +# import math from typing import Dict, Optional, Tuple, Union diff --git a/mindcv/models/nasnet.py b/mindcv/models/nasnet.py index 97c22dbe1..baba52bce 100644 --- a/mindcv/models/nasnet.py +++ b/mindcv/models/nasnet.py @@ -1,6 +1,6 @@ """ MindSpore implementation of `NasNet`. -Refer to: Learning Transferable Architectures for Scalable Image Recognition +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/nasnet.py """ import math diff --git a/mindcv/models/pit.py b/mindcv/models/pit.py index 736940e9d..ce37961e1 100644 --- a/mindcv/models/pit.py +++ b/mindcv/models/pit.py @@ -1,7 +1,22 @@ +""" Pooling-based Vision Transformer (PiT) in PyTorch + +A PyTorch implement of Pooling-based Vision Transformers as described in +'Rethinking Spatial Dimensions of Vision Transformers' - https://arxiv.org/abs/2103.16302 + +This code was adapted from the original version at https://github.com/naver-ai/pit, original copyright below. + +Modifications for timm by / Copyright 2020 Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/pit.py + """ -MindSpore implementation of `PiT`. -Refer to Rethinking Spatial Dimensions of Vision Transformers. -""" +# PiT +# Copyright 2021-present NAVER Corp. +# Apache License v2.0 import math from typing import List diff --git a/mindcv/models/pnasnet.py b/mindcv/models/pnasnet.py index fc78484b7..ddf698c69 100644 --- a/mindcv/models/pnasnet.py +++ b/mindcv/models/pnasnet.py @@ -1,6 +1,6 @@ """ MindSpore implementation of pnasnet. -Refer to Progressive Neural Architecture Search. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/pnasnet.py """ import math diff --git a/mindcv/models/poolformer.py b/mindcv/models/poolformer.py index 1e0d2ecd7..4374600aa 100644 --- a/mindcv/models/poolformer.py +++ b/mindcv/models/poolformer.py @@ -1,6 +1,7 @@ """ MindSpore implementation of `poolformer`. Refer to PoolFormer: MetaFormer Is Actually What You Need for Vision. +Adapted from https://github.com/sail-sg/poolformer. """ import collections.abc diff --git a/mindcv/models/pvt.py b/mindcv/models/pvt.py index 5338b80bb..7a562c410 100644 --- a/mindcv/models/pvt.py +++ b/mindcv/models/pvt.py @@ -1,6 +1,15 @@ """ -MindSpore implementation of `PVT`. -Refer to PVT: Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions +@inproceedings{wang2021pyramid, + title={Pyramid vision transformer: A versatile backbone for dense prediction without convolutions}, + author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, + Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={568--578}, + year={2021} +} +Based on Apache 2.0 licensed code at https://github.com/whai362/PVT + +Modified for use with the MindSpore framework. """ import math from functools import partial diff --git a/mindcv/models/pvtv2.py b/mindcv/models/pvtv2.py index c4091b7e8..b92b0586f 100644 --- a/mindcv/models/pvtv2.py +++ b/mindcv/models/pvtv2.py @@ -1,6 +1,24 @@ """ -MindSpore implementation of `PVTv2`. -Refer to PVTv2: PVTv2: Improved Baselines with Pyramid Vision Transformer +@misc{wang2021pvtv2, + title={PVTv2: Improved Baselines with Pyramid Vision Transformer}, + author={Wenhai Wang and Enze Xie and Xiang Li and Deng-Ping Fan and Kaitao Song and Ding Liang and + Tong Lu and Ping Luo and Ling Shao}, + year={2021}, + eprint={2106.13797}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +Based on Apache 2.0 licensed code at https://github.com/whai362/PVT + +Modifications and timm support by / Copyright 2022, Ross Wightman + +------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/pvt_v2.py + """ import math from functools import partial diff --git a/mindcv/models/regnet.py b/mindcv/models/regnet.py index 5f42c5812..92010ce50 100644 --- a/mindcv/models/regnet.py +++ b/mindcv/models/regnet.py @@ -1,6 +1,11 @@ """ +Hacked together by / Copyright 2020 Ross Wightman + +------------------------------------------------------------------------ MindSpore implementation of `RegNet`. -Refer to: Designing Network Design Spaces +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/regnet.py + """ import math diff --git a/mindcv/models/repmlp.py b/mindcv/models/repmlp.py index 0fa9bd6e1..741133f1a 100644 --- a/mindcv/models/repmlp.py +++ b/mindcv/models/repmlp.py @@ -1,6 +1,7 @@ """ MindSpore implementation of `RepMLPNet`. Refer to RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality. +Adapted from https://github.com/DingXiaoH/RepMLP """ from collections import OrderedDict diff --git a/mindcv/models/repvgg.py b/mindcv/models/repvgg.py index 6aa89a3a3..c271bcf14 100644 --- a/mindcv/models/repvgg.py +++ b/mindcv/models/repvgg.py @@ -1,6 +1,7 @@ """ MindSpore implementation of `RepVGG`. Refer to RepVGG: Making VGG_style ConvNets Great Again +Adapted from https://github.com/DingXiaoH/RepVGG """ import copy diff --git a/mindcv/models/res2net.py b/mindcv/models/res2net.py index 54a9990b9..e0872ae73 100644 --- a/mindcv/models/res2net.py +++ b/mindcv/models/res2net.py @@ -1,6 +1,6 @@ """ MindSpore implementation of `Res2Net`. -Refer to Res2Net: A New Multi-scale Backbone Architecture. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/res2net.py """ import math diff --git a/mindcv/models/resnest.py b/mindcv/models/resnest.py index 6a28e30e3..c939d5f9a 100644 --- a/mindcv/models/resnest.py +++ b/mindcv/models/resnest.py @@ -1,6 +1,15 @@ -""" +"""ResNeSt Models + +Paper: `ResNeSt: Split-Attention Networks` - https://arxiv.org/abs/2004.08955 + +Adapted from original PyTorch impl w/ weights at https://github.com/zhanghang1989/ResNeSt by Hang Zhang + +Modified for torchscript compat, and consistency with timm by Ross Wightman + +------------------------------------------------------------------------ MindSpore implementation of `ResNeSt`. -Refer to ResNeSt: Split-Attention Networks. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/resnest.py """ from typing import List, Optional, Type diff --git a/mindcv/models/resnet.py b/mindcv/models/resnet.py index 5649af875..df3abddae 100644 --- a/mindcv/models/resnet.py +++ b/mindcv/models/resnet.py @@ -1,6 +1,16 @@ -""" -MindSpore implementation of `ResNet`. -Refer to Deep Residual Learning for Image Recognition. +"""ResNet +This started as a copy of https://github.com/pytorch/vision 'resnet.py' (BSD-3-Clause) with +additional dropout and dynamic global avg/max pool. + +ResNeXt, SE-ResNeXt, SENet, and MXNet Gluon stem/downsample variants, tiered stems added by Ross Wightman + +Copyright 2019, Ross Wightman + +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/resnet.py + """ from typing import List, Optional, Type, Union diff --git a/mindcv/models/resnetv2.py b/mindcv/models/resnetv2.py index 144b1d580..284cda760 100644 --- a/mindcv/models/resnetv2.py +++ b/mindcv/models/resnetv2.py @@ -1,7 +1,40 @@ +"""Pre-Activation ResNet v2 with GroupNorm and Weight Standardization. + +A PyTorch implementation of ResNetV2 adapted from the Google Big-Transfer (BiT) source code +at https://github.com/google-research/big_transfer to match timm interfaces. The BiT weights have +been included here as pretrained models from their original .NPZ checkpoints. + +Additionally, supports non pre-activation bottleneck for use as a backbone for Vision Transformers (ViT) and +extra padding support to allow porting of official Hybrid ResNet pretrained weights from +https://github.com/google-research/vision_transformer + +Thanks to the Google team for the above two repositories and associated papers: +* Big Transfer (BiT): General Visual Representation Learning - https://arxiv.org/abs/1912.11370 +* An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale - https://arxiv.org/abs/2010.11929 +* Knowledge distillation: A good teacher is patient and consistent - https://arxiv.org/abs/2106.05237 + +Original copyright of Google code below, modifications by Ross Wightman, Copyright 2020. + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/resnetv2.py + """ -MindSpore implementation of `ResNetV2`. -Refer to Identity Mappings in Deep Residual Networks. -""" +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from typing import Optional diff --git a/mindcv/models/rexnet.py b/mindcv/models/rexnet.py index ba2850649..0430f65f5 100644 --- a/mindcv/models/rexnet.py +++ b/mindcv/models/rexnet.py @@ -1,7 +1,22 @@ +""" ReXNet + +`ReXNet: Diminishing Representational Bottleneck on Convolutional Neural Network` - +https://arxiv.org/abs/2007.00992 + +Adapted from original impl at https://github.com/clovaai/rexnet +Copyright (c) 2020-present NAVER Corp. MIT license + +Changes for timm, feature extraction, and rounded channel variant hacked together by Ross Wightman +Copyright 2020 Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/rexnet.py + """ -MindSpore implementation of `ReXNet`. -Refer to ReXNet: Rethinking Channel Dimensions for Efficient Model Design. -""" + import math from math import ceil from typing import Any diff --git a/mindcv/models/senet.py b/mindcv/models/senet.py index b35030ad4..f8f40f486 100644 --- a/mindcv/models/senet.py +++ b/mindcv/models/senet.py @@ -1,6 +1,6 @@ """ MindSpore implementation of `SENet`. -Refer to Squeeze-and-Excitation Networks. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/senet.py """ import math diff --git a/mindcv/models/shufflenetv2.py b/mindcv/models/shufflenetv2.py index bc49fc2ff..c7b5c204b 100644 --- a/mindcv/models/shufflenetv2.py +++ b/mindcv/models/shufflenetv2.py @@ -1,6 +1,7 @@ """ MindSpore implementation of `ShuffleNetV2`. Refer to ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design +Adapted from https://github.com/pytorch/vision/blob/main/torchvision/models/shufflenetv2.py """ from typing import Tuple diff --git a/mindcv/models/sknet.py b/mindcv/models/sknet.py index c1500c2cb..5e59d5696 100644 --- a/mindcv/models/sknet.py +++ b/mindcv/models/sknet.py @@ -1,6 +1,18 @@ -""" -MindSpore implementation of `SKNet`. -Refer to Selective Kernel Networks. +""" Selective Kernel Networks (ResNet base) +Paper: Selective Kernel Networks (https://arxiv.org/abs/1903.06586) + +This was inspired by reading 'Compounding the Performance Improvements...' (https://arxiv.org/abs/2001.06268) +and a streamlined impl at https://github.com/clovaai/assembled-cnn but I ended up building something closer +to the original paper with some modifications of my own to better balance param count vs accuracy. + +Hacked together by / Copyright 2020 Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/sknet.py + """ from typing import Dict, List, Optional, Type, Union diff --git a/mindcv/models/squeezenet.py b/mindcv/models/squeezenet.py index b70a21644..960e849eb 100644 --- a/mindcv/models/squeezenet.py +++ b/mindcv/models/squeezenet.py @@ -1,6 +1,7 @@ """ MindSpore implementation of `SqueezeNet`. Refer to SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size. +Adapted from https://github.com/pytorch/vision/blob/main/torchvision/models/squeezenet.py """ import mindspore.common.initializer as init diff --git a/mindcv/models/swintransformer.py b/mindcv/models/swintransformer.py index 237f1c7ee..705c203dd 100644 --- a/mindcv/models/swintransformer.py +++ b/mindcv/models/swintransformer.py @@ -1,4 +1,26 @@ -"""Define SwinTransformer model""" +""" Swin Transformer +`Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` + - https://arxiv.org/pdf/2103.14030 + +Code/weights from https://github.com/microsoft/Swin-Transformer, original copyright/license info below + +S3 (AutoFormerV2, https://arxiv.org/abs/2111.14725) Swin weights from + - https://github.com/microsoft/Cream/tree/main/AutoFormerV2 + +Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman + +# -------------------------------------------------------- +# Swin Transformer +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ze Liu +# -------------------------------------------------------- + +MindSpore adaptation: +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/swin_transformer.py +""" + from typing import List, Optional, Tuple import numpy as np diff --git a/mindcv/models/swintransformerv2.py b/mindcv/models/swintransformerv2.py index 649e2f780..1500c7084 100644 --- a/mindcv/models/swintransformerv2.py +++ b/mindcv/models/swintransformerv2.py @@ -1,7 +1,23 @@ +""" Swin Transformer V2 +`Swin Transformer V2: Scaling Up Capacity and Resolution` + - https://arxiv.org/abs/2111.09883 + +Code/weights from https://github.com/microsoft/Swin-Transformer, original copyright/license info below + +Modifications and additions for timm hacked together by / Copyright 2022, Ross Wightman + +-------------------------------------------------------- +MindSpore adaptation: +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/swin_transformer_v2.py + """ -MindSpore implementation of `SwinTransformer V2`. -Refer to Swin Transformer V2: Scaling Up Capacity and Resolution. -""" +# -------------------------------------------------------- +# Swin Transformer V2 +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ze Liu +# -------------------------------------------------------- from typing import List, Optional, Tuple, Union diff --git a/mindcv/models/vgg.py b/mindcv/models/vgg.py index 8c37d8596..a92d3dbda 100644 --- a/mindcv/models/vgg.py +++ b/mindcv/models/vgg.py @@ -1,6 +1,15 @@ -""" -MindSpore implementation of `VGGNet`. -Refer to SqueezeNet: Very Deep Convolutional Networks for Large-Scale Image Recognition. +"""VGG + +Adapted from https://github.com/pytorch/vision 'vgg.py' (BSD-3-Clause) with a few changes for +timm functionality. + +Copyright 2021 Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vgg.py """ import math diff --git a/mindcv/models/visformer.py b/mindcv/models/visformer.py index 1e120d403..de0eda60d 100644 --- a/mindcv/models/visformer.py +++ b/mindcv/models/visformer.py @@ -1,6 +1,16 @@ -""" -MindSpore implementation of `Visformer`. -Refer to: Visformer: The Vision-friendly Transformer +""" Visformer + +Paper: Visformer: The Vision-friendly Transformer - https://arxiv.org/abs/2104.12533 + +From original at https://github.com/danczs/Visformer + +Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/visformer.py """ from typing import List diff --git a/mindcv/models/vit.py b/mindcv/models/vit.py index c8ee0967e..edfaed051 100644 --- a/mindcv/models/vit.py +++ b/mindcv/models/vit.py @@ -1,4 +1,34 @@ -"""ViT""" +"""Vision Transformer (ViT) + +Vision Transformers as described in: + +'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale' + - https://arxiv.org/abs/2010.11929 + +`How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers` + - https://arxiv.org/abs/2106.10270 + +`FlexiViT: One Model for All Patch Sizes` + - https://arxiv.org/abs/2212.08013 + +The official jax code is released and available at + * https://github.com/google-research/vision_transformer + * https://github.com/google-research/big_vision + +Acknowledgments: + * The paper authors for releasing code and weights, thanks! + * I fixed my class token impl based on Phil Wang's https://github.com/lucidrains/vit-pytorch + * Simple transformer style inspired by Andrej Karpathy's https://github.com/karpathy/minGPT + * Bert reference code checks against Huggingface Transformers and Tensorflow Bert + +Hacked together by / Copyright 2020, Ross Wightman + +-------------------------------------------------------- +MindSpore adaptation: +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py +""" + import functools from typing import Callable, Optional diff --git a/mindcv/models/volo.py b/mindcv/models/volo.py index 536f9f2c4..67a52e17e 100644 --- a/mindcv/models/volo.py +++ b/mindcv/models/volo.py @@ -1,6 +1,31 @@ -""" -Vision OutLOoker (VOLO) implementation -Modified from timm/models/vision_transformer.py +""" Vision OutLOoker (VOLO) implementation + +Paper: `VOLO: Vision Outlooker for Visual Recognition` - https://arxiv.org/abs/2106.13112 + +Code adapted from official impl at https://github.com/sail-sg/volo, original copyright in comment below + +Modifications and additions for timm by / Copyright 2022, Ross Wightman + +# -------------------------------------------------------- +# Copyright 2021 Sea Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------- + +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/volo.py """ import numpy as np diff --git a/mindcv/models/xception.py b/mindcv/models/xception.py index 445cbcde4..6cc41c3e7 100644 --- a/mindcv/models/xception.py +++ b/mindcv/models/xception.py @@ -1,6 +1,12 @@ """ +Ported to pytorch thanks to [tstandley](https://github.com/tstandley/Xception-PyTorch) + +@author: tstandley +Adapted by cadene + +------------------------------------------------ MindSpore implementation of Xception. -Refer to Xception: Deep Learning with Depthwise Separable Convolutions. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/xception.py """ import mindspore.common.initializer as init diff --git a/mindcv/models/xcit.py b/mindcv/models/xcit.py index 2c3b6966e..f4841a6fd 100644 --- a/mindcv/models/xcit.py +++ b/mindcv/models/xcit.py @@ -1,6 +1,18 @@ -""" -MindSpore implementation of XCiT -Refer to: XCiT: Cross-Covariance Image Transformers +""" Cross-Covariance Image Transformer (XCiT) in PyTorch + +Paper: + - https://arxiv.org/abs/2106.09681 + +Same as the official implementation, with some minor adaptations, original copyright below + - https://github.com/facebookresearch/xcit/blob/master/xcit.py + +Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman + +------------------------------------------------------------------------ +MindSpore adaptation: +Modified for use with the MindSpore framework. +This file is part of a derivative work and remains under the original license. +Adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/xcit.py """ from functools import partial