diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 37c7b50f..7ff6e8f3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,14 +3,29 @@ name: Continuous Integration on: [push] jobs: - ubuntu-unit-test: + mypy: + name: mypy check + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: '3.7' + - name: Install mypy + run: | + cat scripts/requirements_dev.txt | grep "mypy" | xargs pip install + - name: Check + run: | + scripts/mypy.sh --version + scripts/mypy.sh + ubuntu-unit-test: runs-on: ubuntu-18.04 strategy: matrix: mpi-vendor: ['openmpi', 'mpich'] python-version: [3.7, 3.8] - steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -33,15 +48,13 @@ jobs: pip list - name: Unit Test run: make test_torch_basic test_torch_ops test_torch_hier_ops - + macos-unit-test: - runs-on: macos-10.15 strategy: matrix: mpi-vendor: ['openmpi', 'mpich'] python-version: [3.7, 3.8] - steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -63,4 +76,4 @@ jobs: mpiexec --version pip list - name: Unit Test - run: OVERSUBSCRIBE=1 make test_torch_basic test_torch_hier_ops test_torch_ops test_torch_win_ops \ No newline at end of file + run: OVERSUBSCRIBE=1 make test_torch_basic test_torch_hier_ops test_torch_ops test_torch_win_ops diff --git a/bluefog/common/topology_util.py b/bluefog/common/topology_util.py index e77411a4..138c8acb 100644 --- a/bluefog/common/topology_util.py +++ b/bluefog/common/topology_util.py @@ -116,11 +116,11 @@ def ExponentialGraph(size: int, base: int = 2) -> nx.DiGraph: x.append(1.0) else: x.append(0.0) - x = np.array(x) - x /= x.sum() + y = np.array(x) + y /= y.sum() topo = np.empty((size, size)) for i in range(size): - topo[i] = np.roll(x, i) + topo[i] = np.roll(y, i) G = nx.from_numpy_array(topo, create_using=nx.DiGraph) return G @@ -148,11 +148,11 @@ def SymmetricExponentialGraph(size: int, base: int = 4) -> nx.DiGraph: x.append(1.0) else: x.append(0.0) - x = np.array(x) - x /= x.sum() + y = np.array(x) + y /= y.sum() topo = np.empty((size, size)) for i in range(size): - topo[i] = np.roll(x, i) + topo[i] = np.roll(y, i) G = nx.from_numpy_array(topo, create_using=nx.DiGraph) return G diff --git a/bluefog/common/util.py b/bluefog/common/util.py index f6c61aeb..59b84fdb 100644 --- a/bluefog/common/util.py +++ b/bluefog/common/util.py @@ -22,7 +22,7 @@ EXTENSIONS = ['tensorflow', 'torch'] def is_running_from_ipython(): - from IPython import get_ipython + from IPython import get_ipython # type: ignore return get_ipython() is not None def get_ext_suffix(): diff --git a/bluefog/run/env_util.py b/bluefog/run/env_util.py index 2b1bfced..8f8c9929 100644 --- a/bluefog/run/env_util.py +++ b/bluefog/run/env_util.py @@ -52,7 +52,7 @@ def is_open_mpi_installed(): def is_ipyparallel_installed(): try: - import ipyparallel # pylint: disable=unused-import + import ipyparallel # type: ignore # pylint: disable=unused-import return True except ImportError: return False diff --git a/bluefog/run/horovod_driver.py b/bluefog/run/horovod_driver.py index 735a8d79..8b9a5cf5 100644 --- a/bluefog/run/horovod_driver.py +++ b/bluefog/run/horovod_driver.py @@ -18,7 +18,7 @@ import os import sys -import six +import six # type: ignore from bluefog.run.horovodrun.common.util import codec, safe_shell_exec, timeout, secret from bluefog.run.horovodrun.driver import driver_service from bluefog.run.horovodrun.task import task_service diff --git a/bluefog/run/horovodrun/common/util/codec.py b/bluefog/run/horovodrun/common/util/codec.py index 4578255b..a4003f71 100644 --- a/bluefog/run/horovodrun/common/util/codec.py +++ b/bluefog/run/horovodrun/common/util/codec.py @@ -14,7 +14,7 @@ # ============================================================================== import base64 -import cloudpickle +import cloudpickle # type: ignore def loads_base64(encoded): diff --git a/bluefog/run/horovodrun/common/util/network.py b/bluefog/run/horovodrun/common/util/network.py index 1ad4d677..0b4a52a4 100644 --- a/bluefog/run/horovodrun/common/util/network.py +++ b/bluefog/run/horovodrun/common/util/network.py @@ -17,10 +17,10 @@ import socket import struct import threading -import cloudpickle -import psutil +import cloudpickle # type: ignore +import psutil # type: ignore -from six.moves import queue, socketserver +from six.moves import queue, socketserver # type: ignore from bluefog.run.horovodrun.common.util import secret diff --git a/bluefog/run/horovodrun/common/util/safe_shell_exec.py b/bluefog/run/horovodrun/common/util/safe_shell_exec.py index 9ae2d2c5..1216e509 100644 --- a/bluefog/run/horovodrun/common/util/safe_shell_exec.py +++ b/bluefog/run/horovodrun/common/util/safe_shell_exec.py @@ -14,7 +14,7 @@ # ============================================================================== import os -import psutil +import psutil # type: ignore import signal import subprocess import sys diff --git a/bluefog/run/horovodrun/util/cache.py b/bluefog/run/horovodrun/util/cache.py index dc83661f..5f81f303 100644 --- a/bluefog/run/horovodrun/util/cache.py +++ b/bluefog/run/horovodrun/util/cache.py @@ -17,7 +17,7 @@ import errno import os import threading -import cloudpickle +import cloudpickle # type: ignore class Cache(object): diff --git a/bluefog/run/horovodrun/util/network.py b/bluefog/run/horovodrun/util/network.py index fbf2e5d9..d41b9958 100644 --- a/bluefog/run/horovodrun/util/network.py +++ b/bluefog/run/horovodrun/util/network.py @@ -1,5 +1,5 @@ import socket -import psutil +import psutil # type: ignore from bluefog.run.horovodrun.util import threads diff --git a/bluefog/run/horovodrun/util/threads.py b/bluefog/run/horovodrun/util/threads.py index 78c10b36..5ba1bedd 100644 --- a/bluefog/run/horovodrun/util/threads.py +++ b/bluefog/run/horovodrun/util/threads.py @@ -15,7 +15,7 @@ import threading -from six.moves import queue +from six.moves import queue # type: ignore def execute_function_multithreaded(fn, diff --git a/bluefog/run/interactive_run.py b/bluefog/run/interactive_run.py index f4273409..1a8f5335 100644 --- a/bluefog/run/interactive_run.py +++ b/bluefog/run/interactive_run.py @@ -22,7 +22,7 @@ import time from typing import Dict, List -import ipyparallel as ipp +import ipyparallel as ipp # type: ignore import bluefog from bluefog.run import env_util, network_util, horovod_driver @@ -318,7 +318,7 @@ def multiple_machines_launch(args, env: Dict[str, str], common_intfs = horovod_driver.driver_fn(all_host_names, local_host_names, args.ssh_port, args.verbose) else: - common_intfs = [args.nic] + common_intfs = set(args.nic) tcp_intf_arg = '-mca btl_tcp_if_include {common_intfs}'.format( common_intfs=','.join(common_intfs)) if common_intfs else '' @@ -384,8 +384,6 @@ def multiple_machines_launch(args, env: Dict[str, str], ib_arg=ib_arg, nccl_socket_intf_arg=nccl_socket_intf_arg, extra_flags=extra_flags, - env=' '.join('-x %s' % key for key in env.keys() - if env_util.is_exportable(key)), command=ipengine_command) ) p_engine = subprocess.Popen(mpi_ipengine_command, shell=True, env=env) diff --git a/bluefog/run/network_util.py b/bluefog/run/network_util.py index bcd3fbcb..46a7da65 100644 --- a/bluefog/run/network_util.py +++ b/bluefog/run/network_util.py @@ -20,7 +20,7 @@ import re import socket -import psutil +import psutil # type: ignore from bluefog.run.horovodrun.common.util import safe_shell_exec # Number of retries for sshing into the hosts diff --git a/bluefog/run/run.py b/bluefog/run/run.py index c2bc80cc..2364ca1b 100644 --- a/bluefog/run/run.py +++ b/bluefog/run/run.py @@ -16,14 +16,8 @@ import argparse import os -import re import shlex -import socket -import subprocess -import sys -import traceback -import psutil import bluefog from bluefog.run import env_util, network_util, horovod_driver diff --git a/bluefog/tensorflow/__init__.py b/bluefog/tensorflow/__init__.py index 072ecd8f..d9c6b8b9 100644 --- a/bluefog/tensorflow/__init__.py +++ b/bluefog/tensorflow/__init__.py @@ -4,7 +4,7 @@ import collections import os -import tensorflow as tf +import tensorflow as tf # type: ignore from bluefog.common.util import check_extension check_extension('bluefog.tensorflow', __file__, 'mpi_lib') diff --git a/bluefog/tensorflow/mpi_ops.py b/bluefog/tensorflow/mpi_ops.py index 263d3256..98d31081 100644 --- a/bluefog/tensorflow/mpi_ops.py +++ b/bluefog/tensorflow/mpi_ops.py @@ -19,10 +19,10 @@ from __future__ import print_function import re -import tensorflow as tf -from tensorflow.python.framework import load_library -from tensorflow.python.framework import ops -from tensorflow.python.platform import resource_loader +import tensorflow as tf # type: ignore +from tensorflow.python.framework import load_library # type: ignore +from tensorflow.python.framework import ops # type: ignore +from tensorflow.python.platform import resource_loader # type: ignore from bluefog.common.basics import BlueFogBasics from bluefog.common.util import get_ext_suffix diff --git a/bluefog/tensorflow/optimizers.py b/bluefog/tensorflow/optimizers.py index fbd155ad..10e7e502 100644 --- a/bluefog/tensorflow/optimizers.py +++ b/bluefog/tensorflow/optimizers.py @@ -14,7 +14,7 @@ # limitations under the License. # ============================================================================== -import tensorflow as tf +import tensorflow as tf # type: ignore from bluefog.tensorflow.mpi_ops import allreduce, broadcast, size from bluefog.tensorflow.util import _executing_eagerly, _cache diff --git a/bluefog/tensorflow/util.py b/bluefog/tensorflow/util.py index e3b8f31c..1de817e5 100644 --- a/bluefog/tensorflow/util.py +++ b/bluefog/tensorflow/util.py @@ -16,11 +16,11 @@ from distutils.version import LooseVersion -import tensorflow +import tensorflow # type: ignore # Eager Mode has been introduced in TF 1.7.0 if LooseVersion(tensorflow.__version__) >= LooseVersion('1.7.0'): - from tensorflow.python.eager import context + from tensorflow.python.eager import context # type: ignore _has_eager = True else: _has_eager = False diff --git a/bluefog/torch/optimizers.py b/bluefog/torch/optimizers.py index 86b3e721..65bf0676 100644 --- a/bluefog/torch/optimizers.py +++ b/bluefog/torch/optimizers.py @@ -23,6 +23,7 @@ import warnings import torch +from torch.optim import Optimizer import bluefog.torch as bf class CommunicationType(Enum): @@ -163,7 +164,7 @@ def _timeline_forward_end_hook(module, *unused): *pre_forward_hook_handles, *forward_end_hook_handles] -class _DistributedOptimizer(torch.optim.Optimizer): +class _DistributedOptimizer(Optimizer): def __init__(self, params, model, backward_passes_per_step=1): super(self.__class__, self).__init__(params) @@ -294,7 +295,7 @@ def zero_grad(self): return super(self.__class__, self).zero_grad() -class _DistributedReduceOptimizer(torch.optim.Optimizer): +class _DistributedReduceOptimizer(Optimizer): """ A distributed optimizer wrapper over torch optimizer. Arguments: @@ -482,7 +483,7 @@ def step(self, closure=None): return super(self.__class__, self).step(closure) -class _DistributedAdaptThenCombineOptimizer(torch.optim.Optimizer): +class _DistributedAdaptThenCombineOptimizer(Optimizer): def __init__(self, params, model, communication_type, backward_passes_per_step=1): super(self.__class__, self).__init__(params) @@ -841,7 +842,7 @@ def zero_grad(self): return super(self.__class__, self).zero_grad() -class _DistributedWinOptimizer(torch.optim.Optimizer): +class _DistributedWinOptimizer(Optimizer): def __init__(self, params, model, num_steps_per_communication, window_prefix, pull_style): super(self.__class__, self).__init__(params) @@ -1023,7 +1024,7 @@ def step(self, closure=None): return super(self.__class__, self).step(closure) -class _DistributedPushSumOptimizer(torch.optim.Optimizer): +class _DistributedPushSumOptimizer(Optimizer): def __init__(self, params, model, num_steps_per_communication): super(self.__class__, self).__init__(params) diff --git a/bluefog/torch/topology_util.py b/bluefog/torch/topology_util.py index b0fe38f2..0f5378d6 100644 --- a/bluefog/torch/topology_util.py +++ b/bluefog/torch/topology_util.py @@ -6,7 +6,7 @@ import bluefog.torch as bf -def _check_ranks(rank_list: List[Any], self_rank: int, size: int) -> [bool, str]: +def _check_ranks(rank_list: List[Any], self_rank: int, size: int) -> Tuple[bool, str]: for rank in rank_list: if not isinstance(rank, int): return False, "contain element that is not integer." @@ -21,7 +21,7 @@ def _check_ranks(rank_list: List[Any], self_rank: int, size: int) -> [bool, str] def InferSourceFromDestinationRanks( dst_ranks: List[int], construct_adjacency_matrix: bool = False, -) -> Union[List[int], Tuple[List[int], np.array]]: +) -> Union[List[int], Tuple[List[int], np.ndarray]]: """Infer the source ranks from destination ranks. This is collective communication call. Args: @@ -49,7 +49,7 @@ def InferSourceFromDestinationRanks( def InferDestinationFromSourceRanks( src_ranks: List[int], construct_adjacency_matrix: bool = False, -) -> Union[List[int], np.array]: +) -> Union[List[int], np.ndarray]: """Infer the destination ranks from source ranks. This is collective communication call. Args: diff --git a/examples/pytorch_benchmark.py b/examples/pytorch_benchmark.py index c900797d..a68e2101 100644 --- a/examples/pytorch_benchmark.py +++ b/examples/pytorch_benchmark.py @@ -23,7 +23,7 @@ import torch.nn.functional as F import torch.optim as optim import torch.utils.data.distributed -from torchvision import models +from torchvision import models # type: ignore import bluefog.torch as bf from bluefog.common import topology_util @@ -66,7 +66,7 @@ args.cuda = not args.no_cuda and torch.cuda.is_available() if args.dist_optimizer == 'horovod': - import horovod.torch as bf + import horovod.torch as bf # type: ignore bf.init() diff --git a/examples/pytorch_mnist.py b/examples/pytorch_mnist.py index 57ed59d8..163e3476 100644 --- a/examples/pytorch_mnist.py +++ b/examples/pytorch_mnist.py @@ -17,6 +17,7 @@ from __future__ import print_function from bluefog.common import topology_util +from typing import Tuple, List import bluefog.torch as bf import argparse import os @@ -28,7 +29,7 @@ import torch.nn.functional as F import torch.optim as optim import torch.utils.data.distributed -from torchvision import datasets, transforms +from torchvision import datasets, transforms # type: ignore sys.path.insert(0, os.path.abspath( os.path.join(os.path.dirname(__file__), ".."))) @@ -75,7 +76,7 @@ if args.dist_optimizer == 'horovod': print("importing horovod") - import horovod.torch as bf + import horovod.torch as bf # type: ignore bf.init() @@ -304,7 +305,7 @@ def test(record): ) record.append((test_loss, 100.0 * test_accuracy)) -test_record = [] +test_record: List[Tuple[float, float]] = [] for epoch in range(1, args.epochs + 1): train(epoch) test(test_record) diff --git a/examples/pytorch_resnet.py b/examples/pytorch_resnet.py index ae5c73d9..2b7fbf4d 100644 --- a/examples/pytorch_resnet.py +++ b/examples/pytorch_resnet.py @@ -30,9 +30,9 @@ import torch.nn.functional as F import torch.optim as optim import torch.utils.data.distributed -from torchvision import datasets, transforms, models -import tensorboardX -from tqdm import tqdm +from torchvision import datasets, transforms, models # type: ignore +import tensorboardX # type: ignore +from tqdm import tqdm # type: ignore sys.path.insert(0, os.path.abspath( os.path.join(os.path.dirname(__file__), ".."))) @@ -96,7 +96,7 @@ if args.dist_optimizer == 'horovod': print("importing horovod") - import horovod.torch as bf + import horovod.torch as bf # type: ignore # Bluefog: initialize library. bf.init() diff --git a/scripts/.mypy.ini b/scripts/.mypy.ini new file mode 100644 index 00000000..1fed97e1 --- /dev/null +++ b/scripts/.mypy.ini @@ -0,0 +1,5 @@ +[mypy] + +[mypy-matplotlib.*,networkx.*,pytest.*,numpy.*,tensorflow.*,psutil.*,torch.*] +follow_imports = silent +ignore_missing_imports = true diff --git a/scripts/mypy.sh b/scripts/mypy.sh new file mode 100755 index 00000000..0039ba79 --- /dev/null +++ b/scripts/mypy.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +# Usage: (under the root directory) +# tools/mypy.sh [--version] + +mypy_files=$(find bluefog examples test -name "*.py") +mypy --config-file=./scripts/.mypy.ini "$@" ${mypy_files} diff --git a/scripts/requirements_dev.txt b/scripts/requirements_dev.txt new file mode 100644 index 00000000..5cf5d8d8 --- /dev/null +++ b/scripts/requirements_dev.txt @@ -0,0 +1 @@ +mypy==0.940 \ No newline at end of file diff --git a/test/tensorflow_basics_test.py b/test/tensorflow_basics_test.py index d867cfd0..8bbacd84 100644 --- a/test/tensorflow_basics_test.py +++ b/test/tensorflow_basics_test.py @@ -22,7 +22,7 @@ import numpy as np import networkx as nx import pytest -import tensorflow as tf +import tensorflow as tf # type: ignore from common import mpi_env_rank_and_size import bluefog.tensorflow as bf diff --git a/test/tensorflow_ops_test.py b/test/tensorflow_ops_test.py index 19ae2341..e0a26756 100644 --- a/test/tensorflow_ops_test.py +++ b/test/tensorflow_ops_test.py @@ -20,9 +20,9 @@ import itertools import numpy as np import os -import tensorflow as tf +import tensorflow as tf # type: ignore from bluefog.tensorflow.util import _executing_eagerly, _has_eager -from tensorflow.python.framework import ops +from tensorflow.python.framework import ops # type: ignore import warnings import bluefog.tensorflow as bf