Upgrade formatter version for support higher version of Python (#2957)

* Upgrade formatter version for support higher version of Python * Fix formatting issues * Fix unit tests * disable not working tests
NVIDIA · Sep 25, 2024 · 3b26109 · 3b26109
1 parent 56558c1
commit 3b26109
Show file tree

Hide file tree

Showing 76 changed files with 138 additions and 127 deletions.
diff --git a/examples/advanced/bionemo/downstream/sabdab/prepare_sabdab_data.py b/examples/advanced/bionemo/downstream/sabdab/prepare_sabdab_data.py
@@ -90,7 +90,7 @@ def main():
         proportions = n_clients * [1 / n_clients]
 
     for client_id in range(n_clients):
-        client_name = f"site-{client_id+1}"
+        client_name = f"site-{client_id + 1}"
         client_train_df = train_df.sample(frac=proportions[client_id], replace=False, random_state=seed + client_id)
 
         if do_break_chains:
@@ -132,8 +132,8 @@ def main():
         n_pos = np.sum(_df["Y"] == 0)
         n_neg = np.sum(_df["Y"] == 1)
         n = len(_df)
-        print(f"  {_set} Pos/Neg ratio: neg={n_neg}, pos={n_pos}: {n_pos/n_neg:0.3f}")
-        print(f"  {_set} Trivial accuracy: {n_pos/n:0.3f}")
+        print(f"  {_set} Pos/Neg ratio: neg={n_neg}, pos={n_pos}: {n_pos / n_neg:0.3f}")
+        print(f"  {_set} Trivial accuracy: {n_pos / n:0.3f}")
 
     # measure overlap
     d = np.nan * np.zeros((n_clients, n_clients))
@@ -149,7 +149,7 @@ def main():
 
     print(d)
     overlap = np.mean(d[~np.isnan(d)])
-    print(f"Avg. overlap: {100*overlap:0.2f}%")
+    print(f"Avg. overlap: {100 * overlap:0.2f}%")
 
 
 if __name__ == "__main__":

diff --git a/examples/advanced/bionemo/downstream/tap/prepare_tap_data.py b/examples/advanced/bionemo/downstream/tap/prepare_tap_data.py
@@ -125,7 +125,7 @@ def main():
         proportions = n_clients * [1 / n_clients]
 
     for client_id in range(n_clients):
-        client_name = f"site-{client_id+1}"
+        client_name = f"site-{client_id + 1}"
         client_train_df = train_df.sample(frac=proportions[client_id], replace=False, random_state=seed + client_id)
 
         if do_break_chains:
@@ -177,7 +177,7 @@ def main():
 
     print(d)
     overlap = np.mean(d[~np.isnan(d)])
-    print(f"Avg. overlap: {100*overlap:0.2f}%")
+    print(f"Avg. overlap: {100 * overlap:0.2f}%")
 
 
 if __name__ == "__main__":

diff --git a/examples/advanced/bionemo/task_fitting/split_data.py b/examples/advanced/bionemo/task_fitting/split_data.py
@@ -39,7 +39,7 @@ def get_site_class_summary(train_labels, site_idx):
     for site, data_idx in site_idx.items():
         unq, unq_cnt = np.unique(train_labels[data_idx], return_counts=True)
         tmp = {unq[i]: int(unq_cnt[i]) for i in range(len(unq))}
-        class_sum[f"site-{site+1}"] = tmp
+        class_sum[f"site-{site + 1}"] = tmp
     return class_sum
 
 
@@ -106,7 +106,7 @@ def split(proteins, num_sites, split_dir=".", alpha=1.0, seed=0, concat=False):
     # write split data
     train_proteins = np.asarray(train_proteins)
     for site in range(num_sites):
-        client_name = f"site-{site+1}"
+        client_name = f"site-{site + 1}"
 
         train_indices = site_idx[site]
         split_train_proteins = train_proteins[train_indices]

diff --git a/examples/advanced/experiment-tracking/mlflow/job_api/hello-pt-mlflow.py b/examples/advanced/experiment-tracking/mlflow/job_api/hello-pt-mlflow.py
@@ -44,7 +44,7 @@
     job.to(ctrl, "server")
 
     for i in range(n_clients):
-        site_name = f"site-{i+1}"
+        site_name = f"site-{i + 1}"
         learner_id = job.to(
             PTLearner(epochs=5, lr=0.01, analytic_sender_id="log_writer"),
             site_name,

diff --git a/examples/advanced/experiment-tracking/pt/learner_with_mlflow.py b/examples/advanced/experiment-tracking/pt/learner_with_mlflow.py
@@ -168,7 +168,7 @@ def local_train(self, fl_ctx, abort_signal):
                 running_loss += cost.cpu().detach().numpy() / images.size()[0]
                 if i % 3000 == 0:
                     self.log_info(
-                        fl_ctx, f"Epoch: {epoch}/{self.epochs}, Iteration: {i}, " f"Loss: {running_loss/3000}"
+                        fl_ctx, f"Epoch: {epoch}/{self.epochs}, Iteration: {i}, " f"Loss: {running_loss / 3000}"
                     )
                     running_loss = 0.0
                     self.writer.log_text(

diff --git a/examples/advanced/experiment-tracking/pt/learner_with_tb.py b/examples/advanced/experiment-tracking/pt/learner_with_tb.py
@@ -171,7 +171,7 @@ def local_train(self, fl_ctx, abort_signal):
                 running_loss += cost.cpu().detach().numpy() / images.size()[0]
                 if i % 3000 == 0:
                     self.log_info(
-                        fl_ctx, f"Epoch: {epoch}/{self.epochs}, Iteration: {i}, " f"Loss: {running_loss/3000}"
+                        fl_ctx, f"Epoch: {epoch}/{self.epochs}, Iteration: {i}, " f"Loss: {running_loss / 3000}"
                     )
                     running_loss = 0.0
 

diff --git a/examples/advanced/experiment-tracking/pt/learner_with_wandb.py b/examples/advanced/experiment-tracking/pt/learner_with_wandb.py
@@ -162,7 +162,7 @@ def local_train(self, fl_ctx, abort_signal):
                 running_loss += cost.cpu().detach().numpy() / images.size()[0]
                 if i % 3000 == 0:
                     self.log_info(
-                        fl_ctx, f"Epoch: {epoch}/{self.epochs}, Iteration: {i}, " f"Loss: {running_loss/3000}"
+                        fl_ctx, f"Epoch: {epoch}/{self.epochs}, Iteration: {i}, " f"Loss: {running_loss / 3000}"
                     )
                     running_loss = 0.0
 

diff --git a/examples/advanced/finance-end-to-end/enrich.py b/examples/advanced/finance-end-to-end/enrich.py
@@ -31,7 +31,7 @@ def main():
     output_dir = args.output_dir
 
     site_name = flare.get_site_name()
-    print(f"\n {site_name =} \n ")
+    print(f"\n {site_name=} \n ")
 
     # receives global message from NVFlare
     etl_task = flare.receive()

diff --git a/examples/advanced/finance/utils/prepare_data_vertical.py b/examples/advanced/finance/utils/prepare_data_vertical.py
@@ -106,8 +106,8 @@ def main():
             ]
         )
         df_split = df_split.sample(frac=1)
-        print(f"site-{site+1} split rows [{row_start}:{row_end}],[{rows_total - rows_overlap}:{rows_total}]")
-        print(f"site-{site+1} split cols [{col_start}:{col_end}]")
+        print(f"site-{site + 1} split rows [{row_start}:{row_end}],[{rows_total - rows_overlap}:{rows_total}]")
+        print(f"site-{site + 1} split cols [{col_start}:{col_end}]")
 
         data_path = os.path.join(args.out_path, f"site-{site + 1}")
         if not os.path.exists(data_path):

diff --git a/examples/advanced/gnn/code/graphsage_protein_fl.py b/examples/advanced/gnn/code/graphsage_protein_fl.py
@@ -148,7 +148,7 @@ def main():
                 # add record
                 running_loss += float(loss.item()) * link_pred.numel()
                 instance_count += link_pred.numel()
-            print(f"Epoch: {epoch:02d}, Loss: {running_loss/instance_count:.4f}")
+            print(f"Epoch: {epoch:02d}, Loss: {running_loss / instance_count:.4f}")
             # (optional) add loss to tensorboard
             writer.add_scalar(
                 "train_loss", running_loss / instance_count, input_model.current_round * args.epochs + epoch

diff --git a/examples/advanced/job_api/pt/cse_script_runner_cifar10.py b/examples/advanced/job_api/pt/cse_script_runner_cifar10.py
@@ -66,7 +66,7 @@
             script=train_script,
             script_args="",
         )
-        job.to(executor, f"site-{i+1}")
+        job.to(executor, f"site-{i + 1}")
 
     # job.export_job("/tmp/nvflare/jobs/job_config")
     job.simulator_run("/tmp/nvflare/jobs/workdir", gpu="0")
diff --git a/examples/advanced/job_api/pt/fedavg_model_learner_xsite_val_cifar10.py b/examples/advanced/job_api/pt/fedavg_model_learner_xsite_val_cifar10.py
@@ -48,7 +48,7 @@
     job.to(data_splitter, "server")
 
     for i in range(n_clients):
-        site_name = f"site-{i+1}"
+        site_name = f"site-{i + 1}"
         learner_id = job.to(
             CIFAR10ModelLearner(train_idx_root=train_split_root, aggregation_epochs=aggregation_epochs, lr=0.01),
             site_name,

diff --git a/examples/advanced/job_api/sklearn/kmeans_script_runner_higgs.py b/examples/advanced/job_api/sklearn/kmeans_script_runner_higgs.py
@@ -147,7 +147,7 @@ def split_higgs(input_data_path, input_header_path, output_dir, site_num, sample
             script_args=f"--data_root_dir {data_output_dir}",
             framework=FrameworkType.RAW,  # kmeans requires raw values only rather than PyTorch Tensors (the default)
         )
-        job.to(executor, f"site-{i+1}")  # HIGGs data splitter assumes site names start from 1
+        job.to(executor, f"site-{i + 1}")  # HIGGs data splitter assumes site names start from 1
 
     # job.export_job("/tmp/nvflare/jobs/job_config")
     job.simulator_run("/tmp/nvflare/jobs/workdir")
diff --git a/examples/advanced/job_api/tf/src/cifar10_data_split.py b/examples/advanced/job_api/tf/src/cifar10_data_split.py
@@ -73,7 +73,7 @@ def cifar10_split(split_dir: str = None, num_sites: int = 8, alpha: float = 0.5,
     site_file_path = os.path.join(split_dir, "site-")
     for site in range(num_sites):
         site_file_name = site_file_path + str(site + 1) + ".npy"
-        print(f"Save split index {site+1} of {num_sites} to {site_file_name}")
+        print(f"Save split index {site + 1} of {num_sites} to {site_file_name}")
         np.save(site_file_name, np.array(site_idx[site]))
         train_idx_paths.append(site_file_name)
 

diff --git a/examples/advanced/job_api/tf/tf_fl_script_runner_cifar10.py b/examples/advanced/job_api/tf/tf_fl_script_runner_cifar10.py
@@ -162,7 +162,7 @@
         executor = ScriptRunner(
             script=train_script, script_args=curr_task_script_args, framework=FrameworkType.TENSORFLOW
         )
-        job.to(executor, f"site-{i+1}")
+        job.to(executor, f"site-{i + 1}")
 
     # Can export current job to folder.
     # job.export_job(f"{args.workspace}/nvflare/jobs/job_config")

diff --git a/examples/advanced/llm_hf/utils/preprocess_dolly.py b/examples/advanced/llm_hf/utils/preprocess_dolly.py
@@ -67,7 +67,7 @@ def split_to_jsonl(data, output_dir, validation_ratio, testing_ratio):
                 g.write(json.dumps({"input": input, "output": output}) + "\n")
             else:
                 i.write(json.dumps({"input": input, "output": output}) + "\n")
-    print(f"{index+1} out of {data_ct} Data was successfully preprocessed and saved.")
+    print(f"{index + 1} out of {data_ct} Data was successfully preprocessed and saved.")
 
 
 def main():

diff --git a/examples/advanced/vertical_xgboost/utils/prepare_data.py b/examples/advanced/vertical_xgboost/utils/prepare_data.py
@@ -96,8 +96,8 @@ def main():
             ]
         )
         df_split = df_split.sample(frac=1)
-        print(f"site-{site+1} split rows [{row_start}:{row_end}],[{rows_total - rows_overlap}:{rows_total}]")
-        print(f"site-{site+1} split cols [{col_start}:{col_end}]")
+        print(f"site-{site + 1} split rows [{row_start}:{row_end}],[{rows_total - rows_overlap}:{rows_total}]")
+        print(f"site-{site + 1} split cols [{col_start}:{col_end}]")
 
         data_path = os.path.join(args.out_path, f"site-{site + 1}")
         if not os.path.exists(data_path):

diff --git a/examples/advanced/xgboost_secure/utils/prepare_data_horizontal.py b/examples/advanced/xgboost_secure/utils/prepare_data_horizontal.py
@@ -79,7 +79,7 @@ def main():
         row_end = sum(site_row_size[: site + 1])
 
         df_split = df_train.iloc[row_start:row_end, :]
-        print(f"site-{site+1} split rows [{row_start}:{row_end}]")
+        print(f"site-{site + 1} split rows [{row_start}:{row_end}]")
 
         data_path = os.path.join(args.out_path, f"site-{site + 1}")
         if not os.path.exists(data_path):

diff --git a/examples/advanced/xgboost_secure/utils/prepare_data_vertical.py b/examples/advanced/xgboost_secure/utils/prepare_data_vertical.py
@@ -71,7 +71,7 @@ def main():
         col_end = sum(site_col_size[: site + 1])
 
         df_split = df.iloc[:, col_start:col_end]
-        print(f"site-{site+1} split cols [{col_start}:{col_end}]")
+        print(f"site-{site + 1} split cols [{col_start}:{col_end}]")
 
         data_path = os.path.join(args.out_path, f"site-{site + 1}")
         if not os.path.exists(data_path):

diff --git a/examples/getting_started/sklearn/kmeans_script_runner_higgs.py b/examples/getting_started/sklearn/kmeans_script_runner_higgs.py
@@ -147,7 +147,7 @@ def split_higgs(input_data_path, input_header_path, output_dir, site_num, sample
             script_args=f"--data_root_dir {data_output_dir}",
             framework=FrameworkType.RAW,  # kmeans requires raw values only rather than PyTorch Tensors (the default)
         )
-        job.to(executor, f"site-{i+1}")  # HIGGs data splitter assumes site names start from 1
+        job.to(executor, f"site-{i + 1}")  # HIGGs data splitter assumes site names start from 1
 
     # job.export_job("/tmp/nvflare/jobs/job_config")
     job.simulator_run("/tmp/nvflare/jobs/workdir")
diff --git a/examples/getting_started/tf/src/cifar10_data_split.py b/examples/getting_started/tf/src/cifar10_data_split.py
@@ -73,7 +73,7 @@ def cifar10_split(split_dir: str = None, num_sites: int = 8, alpha: float = 0.5,
     site_file_path = os.path.join(split_dir, "site-")
     for site in range(num_sites):
         site_file_name = site_file_path + str(site + 1) + ".npy"
-        print(f"Save split index {site+1} of {num_sites} to {site_file_name}")
+        print(f"Save split index {site + 1} of {num_sites} to {site_file_name}")
         np.save(site_file_name, np.array(site_idx[site]))
         train_idx_paths.append(site_file_name)
 

diff --git a/examples/getting_started/tf/tf_fl_script_runner_cifar10.py b/examples/getting_started/tf/tf_fl_script_runner_cifar10.py
@@ -162,7 +162,7 @@
         executor = ScriptRunner(
             script=train_script, script_args=curr_task_script_args, framework=FrameworkType.TENSORFLOW
         )
-        job.to(executor, f"site-{i+1}")
+        job.to(executor, f"site-{i + 1}")
 
     # Can export current job to folder.
     # job.export_job(f"{args.workspace}/nvflare/jobs/job_config")

diff --git a/examples/hello-world/hello-cyclic/cyclic_script_runner.py b/examples/hello-world/hello-cyclic/cyclic_script_runner.py
@@ -43,7 +43,7 @@
             script_args="",  # f"--batch_size 32 --data_path /tmp/data/site-{i}"
             framework=FrameworkType.TENSORFLOW,
         )
-        job.to(executor, f"site-{i+1}")
+        job.to(executor, f"site-{i + 1}")
 
     # job.export_job("/tmp/nvflare/jobs/job_config")
     job.simulator_run("/tmp/nvflare/jobs/workdir", gpu="0")
diff --git a/examples/hello-world/hello-fedavg-numpy/fedavg_script_runner_hello-numpy.py b/examples/hello-world/hello-fedavg-numpy/fedavg_script_runner_hello-numpy.py
@@ -40,7 +40,7 @@
     # Add clients
     for i in range(n_clients):
         executor = ScriptRunner(script=train_script, script_args="", framework=FrameworkType.NUMPY)
-        job.to(executor, f"site-{i+1}")
+        job.to(executor, f"site-{i + 1}")
 
     # job.export_job("/tmp/nvflare/jobs/job_config")
     job.simulator_run("/tmp/nvflare/jobs/workdir", gpu="0")
diff --git a/examples/hello-world/hello-pt/fedavg_script_runner_pt.py b/examples/hello-world/hello-pt/fedavg_script_runner_pt.py
@@ -31,7 +31,7 @@
         executor = ScriptRunner(
             script=train_script, script_args=""  # f"--batch_size 32 --data_path /tmp/data/site-{i}"
         )
-        job.to(executor, f"site-{i+1}")
+        job.to(executor, f"site-{i + 1}")
 
     # job.export_job("/tmp/nvflare/jobs/job_config")
     job.simulator_run("/tmp/nvflare/jobs/workdir", gpu="0")
diff --git a/examples/hello-world/hello-pt/src/hello-pt_cifar10_fl.py b/examples/hello-world/hello-pt/src/hello-pt_cifar10_fl.py
@@ -74,7 +74,7 @@ def main():
 
                 running_loss += cost.cpu().detach().numpy() / images.size()[0]
                 if i % 3000 == 0:
-                    print(f"Epoch: {epoch}/{epochs}, Iteration: {i}, Loss: {running_loss/3000}")
+                    print(f"Epoch: {epoch}/{epochs}, Iteration: {i}, Loss: {running_loss / 3000}")
                     global_step = input_model.current_round * steps + epoch * len(train_loader) + i
                     summary_writer.add_scalar(tag="loss_for_each_batch", scalar=running_loss, global_step=global_step)
                     running_loss = 0.0

diff --git a/examples/hello-world/hello-tf/fedavg_script_runner_tf.py b/examples/hello-world/hello-tf/fedavg_script_runner_tf.py
@@ -31,7 +31,7 @@
             script_args="",  # f"--batch_size 32 --data_path /tmp/data/site-{i}"
             framework=FrameworkType.TENSORFLOW,
         )
-        job.to(executor, f"site-{i+1}")
+        job.to(executor, f"site-{i + 1}")
 
     # job.export_job("/tmp/nvflare/jobs/job_config")
     job.simulator_run("/tmp/nvflare/jobs/workdir", gpu="0")
diff --git a/examples/hello-world/ml-to-fl/pt/pt_client_api_job.py b/examples/hello-world/ml-to-fl/pt/pt_client_api_job.py
@@ -63,7 +63,7 @@ def main():
             command=launch_command.replace("{PORT}", ports[i]),
             framework=FrameworkType.PYTORCH,
         )
-        job.to(executor, f"site-{i+1}")
+        job.to(executor, f"site-{i + 1}")
 
     if export_config:
         job.export_job("/tmp/nvflare/jobs/job_config")

diff --git a/integration/nemo/examples/peft/nemo_nvflare/megatron_gpt_peft_tuning.py b/integration/nemo/examples/peft/nemo_nvflare/megatron_gpt_peft_tuning.py
@@ -13,15 +13,13 @@
 # limitations under the License.
 
 import torch.multiprocessing as mp
-from omegaconf.omegaconf import OmegaConf
-
 from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel
 from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronLMPPTrainerBuilder
 from nemo.collections.nlp.parts.peft_config import PEFT_CONFIG_MAP
-
 from nemo.core.config import hydra_runner
 from nemo.utils import logging
 from nemo.utils.exp_manager import exp_manager
+from omegaconf.omegaconf import OmegaConf
 
 mp.set_start_method("spawn", force=True)
 
@@ -57,7 +55,7 @@
 @hydra_runner(config_path="../custom", config_name="megatron_gpt_peft_tuning_config")
 def main(cfg) -> None:
     logging.info("\n\n************** Experiment configuration ***********")
-    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
+    logging.info(f"\n{OmegaConf.to_yaml(cfg)}")
 
     trainer = MegatronLMPPTrainerBuilder(cfg).create_trainer()
     exp_manager(trainer, cfg.exp_manager)
@@ -87,16 +85,16 @@ def main(cfg) -> None:
         # (optional): get the FL system info
         fl_sys_info = flare.system_info()
         print("--- fl_sys_info ---")
-        print(fl_sys_info)            
+        print(fl_sys_info)
 
         # (3) evaluate the current global model to allow server-side model selection.
         print("--- validate global model ---")
         trainer.validate(model)
 
         # (4) Perform local training starting with the received global model.
-        print("--- train new model ---")      
+        print("--- train new model ---")
         trainer.fit(model)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/integration/nemo/examples/peft/nemo_nvflare/peft_model.py b/integration/nemo/examples/peft/nemo_nvflare/peft_model.py
@@ -15,6 +15,7 @@
 
 import logging
 import os
+
 import torch
 
 from nvflare.apis.event_type import EventType
@@ -55,8 +56,8 @@ def _initialize(self, fl_ctx: FLContext):
         from nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model import MegatronGPTSFTModel
         from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronLMPPTrainerBuilder
         from nemo.collections.nlp.parts.peft_config import PEFT_CONFIG_MAP
-        from omegaconf import OmegaConf        
-        
+        from omegaconf import OmegaConf
+
         # get app root
         app_root = fl_ctx.get_prop(FLContextKey.APP_ROOT)
 

diff --git a/integration/nemo/examples/peft/nemo_nvflare/utils.py b/integration/nemo/examples/peft/nemo_nvflare/utils.py
@@ -31,4 +31,3 @@ def convert_global_to_ckpt(global_model_filepath: str, ckpt_path: str):
     torch.save({"state_dict": global_weights}, ckpt_path)
 
     print(f"Saved NeMo ckpt with {len(global_weights)} entries to {ckpt_path}")
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -31,4 +31,3 @@ def convert_global_to_ckpt(global_model_filepath: str, ckpt_path: str):
		torch.save({"state_dict": global_weights}, ckpt_path)

		print(f"Saved NeMo ckpt with {len(global_weights)} entries to {ckpt_path}")