diff --git a/.gitignore b/.gitignore index c80bad2c74..09d7e01de8 100644 --- a/.gitignore +++ b/.gitignore @@ -91,6 +91,7 @@ venv/ ENV/ env.bak/ venv.bak/ +.mise.toml # Spyder project settings .spyderproject diff --git a/examples/advanced/README.md b/examples/advanced/README.md index 96c0acd45a..0465ce6e92 100644 --- a/examples/advanced/README.md +++ b/examples/advanced/README.md @@ -44,6 +44,10 @@ Please also install "./requirements.txt" in each example folder. * [Swarm Learning](./swarm_learning/README.md) * Example of swarm learning with NVIDIA FLARE using PyTorch with the CIFAR-10 dataset. +## Distributed Optimization / P2P algorithms +* [Distributed Optimization](./distributed_optimization/README.md) + * Example of using the low-level NVFlare APIs to implement and run P2P distributed optimization algorithms. + ## Vertical Federated Learning * [Vertical Federated Learning](./vertical_federated_learning/README.md) * Example of running split learning using the CIFAR-10 dataset. diff --git a/examples/advanced/distributed_optimization/.gitignore b/examples/advanced/distributed_optimization/.gitignore new file mode 100644 index 0000000000..87c90aa325 --- /dev/null +++ b/examples/advanced/distributed_optimization/.gitignore @@ -0,0 +1,2 @@ +tmp +data \ No newline at end of file diff --git a/examples/advanced/distributed_optimization/1-consensus/README.md b/examples/advanced/distributed_optimization/1-consensus/README.md new file mode 100644 index 0000000000..1ae35540f2 --- /dev/null +++ b/examples/advanced/distributed_optimization/1-consensus/README.md @@ -0,0 +1,4 @@ +# Consensus algorithm +In this example we show how to run the consensus algorithm. You can find a detailed walkthrough in the [tutorial](tutorial.ipynb) or you can just run the provided [script](launcher.py) via `python launcher.py`. + +![dgd](consensus.png) diff --git a/examples/advanced/distributed_optimization/1-consensus/consensus.png b/examples/advanced/distributed_optimization/1-consensus/consensus.png new file mode 100644 index 0000000000..a8bac4f575 Binary files /dev/null and b/examples/advanced/distributed_optimization/1-consensus/consensus.png differ diff --git a/examples/advanced/distributed_optimization/1-consensus/launcher.py b/examples/advanced/distributed_optimization/1-consensus/launcher.py new file mode 100644 index 0000000000..4c7190ad3e --- /dev/null +++ b/examples/advanced/distributed_optimization/1-consensus/launcher.py @@ -0,0 +1,58 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import random +import matplotlib.pyplot as plt +from nvflare.job_config.api import FedJob + +from nvflare.app_opt.p2p.controllers import DistOptController +from nvflare.app_opt.p2p.executors import ConsensusExecutor +from nvflare.app_opt.p2p.types import Config +from nvflare.app_opt.p2p.utils.config_generator import generate_random_network + + +if __name__ == "__main__": + # Create job + job = FedJob(name="consensus") + + # generate random config + num_clients = 6 + network, _ = generate_random_network(num_clients=num_clients) + config = Config(network=network, extra={"iterations": 50}) + + # send controller to server + controller = DistOptController(config=config) + job.to_server(controller) + + # Add clients + for i in range(num_clients): + executor = ConsensusExecutor(random.randint(0, 10)) + job.to(executor, f"site-{i + 1}") + + # run + job.export_job("./tmp/job_configs") + job.simulator_run("./tmp/runs/consensus") + + history = { + f"site-{i + 1}": torch.load( + f"tmp/runs/consensus/site-{i + 1}/value_sequence.pt" + ) + for i in range(num_clients) + } + plt.figure() + for i in range(num_clients): + plt.plot(history[f"site-{i + 1}"], label=f"site-{i + 1}") + plt.legend() + plt.title("Evolution of local values") + plt.show() diff --git a/examples/advanced/distributed_optimization/1-consensus/tutorial.ipynb b/examples/advanced/distributed_optimization/1-consensus/tutorial.ipynb new file mode 100644 index 0000000000..1085f5da7f --- /dev/null +++ b/examples/advanced/distributed_optimization/1-consensus/tutorial.ipynb @@ -0,0 +1,760 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Consensus algorithm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we'll show how to run a simple consensus algorithm.\n", + "\n", + "## Background\n", + "\n", + "We consider a network of `num_clients` peer clients. Each client $i$ maintains some local value $x_i^t$ and is connected to a subset of the other clients in the network, defined as the set of its neighbors $\\mathcal{N}_i$.\n", + "\n", + "The goal of all the clients is to reach consensus on their local values by communicating with their peers for a certain number of iterations $T$. \n", + "In other terms, they want to cooperatively compute a weighted average of their initial values $x_i^0$.\n", + "\n", + "The consensus algorithm works by having each client updating its local value as\n", + "\n", + "$$x_i^{t+1} = \\sum_{j\\in\\mathcal{N}_i}a_{ji}x_j^t$$\n", + "\n", + "where $a_{ji}$ is the weight associated by client $i$ to client $j$ and $\\sum_{j=1}^N a_{ji}=1$ for all $i$." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "Let's assume we have $N=5$ clients and generate a random configuration." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from nvflare.app_opt.p2p.utils.config_generator import generate_random_network\n", + "from nvflare.app_opt.p2p.types import Config\n", + "\n", + "# generate random config\n", + "num_clients=5\n", + "network, adjacency_matrix = generate_random_network(num_clients=num_clients)\n", + "config = Config(network=network, extra={\"iterations\": 50})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By inspecting the config we can see the initial values of each client as well as the network topology." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Config(\n",
+       "    network=Network(\n",
+       "        nodes=[\n",
+       "            Node(\n",
+       "                id='site-1',\n",
+       "                neighbors=[\n",
+       "                    Neighbor(id='site-2', weight=0.3333333432674408),\n",
+       "                    Neighbor(id='site-4', weight=0.3333333432674408)\n",
+       "                ]\n",
+       "            ),\n",
+       "            Node(id='site-2', neighbors=[Neighbor(id='site-1', weight=0.3333333432674408)]),\n",
+       "            Node(\n",
+       "                id='site-3',\n",
+       "                neighbors=[\n",
+       "                    Neighbor(id='site-4', weight=0.3333333432674408),\n",
+       "                    Neighbor(id='site-5', weight=0.3333333432674408)\n",
+       "                ]\n",
+       "            ),\n",
+       "            Node(\n",
+       "                id='site-4',\n",
+       "                neighbors=[\n",
+       "                    Neighbor(id='site-1', weight=0.3333333432674408),\n",
+       "                    Neighbor(id='site-3', weight=0.3333333432674408)\n",
+       "                ]\n",
+       "            ),\n",
+       "            Node(id='site-5', neighbors=[Neighbor(id='site-3', weight=0.3333333432674408)])\n",
+       "        ]\n",
+       "    ),\n",
+       "    extra={'iterations': 50}\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mConfig\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mnetwork\u001b[0m=\u001b[1;35mNetwork\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mnodes\u001b[0m=\u001b[1m[\u001b[0m\n", + " \u001b[1;35mNode\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mid\u001b[0m=\u001b[32m'site-1'\u001b[0m,\n", + " \u001b[33mneighbors\u001b[0m=\u001b[1m[\u001b[0m\n", + " \u001b[1;35mNeighbor\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-2'\u001b[0m, \u001b[33mweight\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.3333333432674408\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mNeighbor\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-4'\u001b[0m, \u001b[33mweight\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.3333333432674408\u001b[0m\u001b[1m)\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[1;35mNode\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-2'\u001b[0m, \u001b[33mneighbors\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mNeighbor\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-1'\u001b[0m, \u001b[33mweight\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.3333333432674408\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mNode\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mid\u001b[0m=\u001b[32m'site-3'\u001b[0m,\n", + " \u001b[33mneighbors\u001b[0m=\u001b[1m[\u001b[0m\n", + " \u001b[1;35mNeighbor\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-4'\u001b[0m, \u001b[33mweight\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.3333333432674408\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mNeighbor\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-5'\u001b[0m, \u001b[33mweight\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.3333333432674408\u001b[0m\u001b[1m)\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[1;35mNode\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mid\u001b[0m=\u001b[32m'site-4'\u001b[0m,\n", + " \u001b[33mneighbors\u001b[0m=\u001b[1m[\u001b[0m\n", + " \u001b[1;35mNeighbor\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-1'\u001b[0m, \u001b[33mweight\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.3333333432674408\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mNeighbor\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-3'\u001b[0m, \u001b[33mweight\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.3333333432674408\u001b[0m\u001b[1m)\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[1;35mNode\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-5'\u001b[0m, \u001b[33mneighbors\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mNeighbor\u001b[0m\u001b[1m(\u001b[0m\u001b[33mid\u001b[0m=\u001b[32m'site-3'\u001b[0m, \u001b[33mweight\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.3333333432674408\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m\u001b[1m)\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[33mextra\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'iterations'\u001b[0m: \u001b[1;36m50\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from rich import print\n", + "\n", + "print(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiating and running the distributed algorithm\n", + "\n", + "Now that we have all the configurations, we can instantiate the clients and a controller." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from nvflare.job_config.api import FedJob\n", + "from nvflare.app_opt.p2p.executors import ConsensusExecutor\n", + "from nvflare.app_opt.p2p.controllers import DistOptController\n", + "\n", + "# Create job\n", + "job = FedJob(name=\"consensus\")\n", + "\n", + "# send controller to server\n", + "controller = DistOptController(config=config)\n", + "job.to_server(controller)\n", + "\n", + "# Add clients\n", + "for i in range(num_clients):\n", + " executor = ConsensusExecutor() # the executor will be initialized with a random value\n", + " job.to(executor, f\"site-{i+1}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can run the algorithm" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38m2025-02-03 10:22:52,438 - SimulatorRunner - INFO - Create the Simulator Server.\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:52,439 - CoreCell - INFO - server: creating listener on tcp://0:55189\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:52,633 - CoreCell - INFO - server: created backbone external listener for tcp://0:55189\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:52,633 - ConnectorManager - INFO - 15299: Try start_listener Listener resources: {'secure': False, 'host': 'localhost'}\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:52,634 - conn_manager - INFO - Connector [CH00002 PASSIVE tcp://0:16697] is starting\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,139 - CoreCell - INFO - server: created backbone internal listener for tcp://localhost:16697\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,139 - conn_manager - INFO - Connector [CH00001 PASSIVE tcp://0:55189] is starting\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,140 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,140 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,141 - Cell - INFO - Register blob CB for channel='aux_communication', topic='*'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,141 - SimulatorServer - INFO - max_reg_duration=60.0\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,188 - hci - INFO - Starting Admin Server localhost on Port 55190\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,188 - SimulatorRunner - INFO - Deploy the Apps.\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,230 - SimulatorRunner - INFO - Create the simulate clients.\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,231 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,231 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,237 - Communicator - INFO - Trying to register with server ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,238 - ClientManager - INFO - authenticated client site-1: client_fqcn='site-1'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,238 - ClientManager - INFO - Client: New client site-1@192.168.0.251 joined. Sent token: 4118e4ff-7abe-4afa-9d1c-f60a8ce067f3. Total clients: 1\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,238 - Communicator - INFO - register RC: ok\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,238 - FederatedClient - INFO - Successfully registered client:site-1 for project simulator_server. Token:4118e4ff-7abe-4afa-9d1c-f60a8ce067f3 SSID:\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,239 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,239 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,239 - Communicator - INFO - Trying to register with server ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,240 - ClientManager - INFO - authenticated client site-2: client_fqcn='site-2'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,240 - ClientManager - INFO - Client: New client site-2@192.168.0.251 joined. Sent token: 94cc054d-13c8-4ab1-b1c4-61fa33f6753d. Total clients: 2\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,240 - Communicator - INFO - register RC: ok\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,240 - FederatedClient - INFO - Successfully registered client:site-2 for project simulator_server. Token:94cc054d-13c8-4ab1-b1c4-61fa33f6753d SSID:\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,241 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,241 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,241 - Communicator - INFO - Trying to register with server ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,242 - ClientManager - INFO - authenticated client site-3: client_fqcn='site-3'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,242 - ClientManager - INFO - Client: New client site-3@192.168.0.251 joined. Sent token: c4764ab1-0ca6-478a-a3a0-0d24c4ea3c4e. Total clients: 3\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,242 - Communicator - INFO - register RC: ok\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,242 - FederatedClient - INFO - Successfully registered client:site-3 for project simulator_server. Token:c4764ab1-0ca6-478a-a3a0-0d24c4ea3c4e SSID:\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,242 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,242 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,243 - Communicator - INFO - Trying to register with server ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,243 - ClientManager - INFO - authenticated client site-4: client_fqcn='site-4'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,243 - ClientManager - INFO - Client: New client site-4@192.168.0.251 joined. Sent token: a3ff8cfc-05f9-4fbd-9afa-590652798c7e. Total clients: 4\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,243 - Communicator - INFO - register RC: ok\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,243 - FederatedClient - INFO - Successfully registered client:site-4 for project simulator_server. Token:a3ff8cfc-05f9-4fbd-9afa-590652798c7e SSID:\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,243 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,243 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,243 - Communicator - INFO - Trying to register with server ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,244 - ClientManager - INFO - authenticated client site-5: client_fqcn='site-5'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,244 - ClientManager - INFO - Client: New client site-5@192.168.0.251 joined. Sent token: ea899d30-af22-4660-bab5-c8b6e18d62fe. Total clients: 5\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,244 - Communicator - INFO - register RC: ok\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,244 - FederatedClient - INFO - Successfully registered client:site-5 for project simulator_server. Token:ea899d30-af22-4660-bab5-c8b6e18d62fe SSID:\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,244 - SimulatorRunner - INFO - Set the client status ready.\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,244 - SimulatorRunner - INFO - Deploy and start the Server App.\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,244 - Cell - INFO - Register blob CB for channel='server_command', topic='*'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,244 - Cell - INFO - Register blob CB for channel='aux_communication', topic='*'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,244 - ServerCommandAgent - INFO - ServerCommandAgent cell register_request_cb: server.simulate_job\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,258 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,258 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,259 - AuxRunner - INFO - registered aux handler for topic __sync_runner__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,259 - AuxRunner - INFO - registered aux handler for topic __job_heartbeat__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,259 - AuxRunner - INFO - registered aux handler for topic __task_check__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,259 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REQUEST\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,259 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REPLY\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,259 - ReliableMessage - INFO - enabled reliable message: max_request_workers=20 query_interval=2.0\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,259 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job] - Server runner starting ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,259 - AuxRunner - INFO - registered aux handler for topic fed.event\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,260 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job] - starting workflow controller () ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,260 - DistOptController - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - P2PAlgorithmController started\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,260 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - Workflow controller () started\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:53,260 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - scheduled task config\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:54,247 - SimulatorClientRunner - INFO - Start the clients run simulation.\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:55,250 - SimulatorClientRunner - INFO - Simulate Run client: site-1 on GPU group: None\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:55,250 - SimulatorClientRunner - INFO - Simulate Run client: site-2 on GPU group: None\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:55,250 - SimulatorClientRunner - INFO - Simulate Run client: site-3 on GPU group: None\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:55,255 - SimulatorClientRunner - INFO - Simulate Run client: site-4 on GPU group: None\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:55,261 - SimulatorClientRunner - INFO - Simulate Run client: site-5 on GPU group: None\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,278 - ClientTaskWorker - INFO - ClientTaskWorker started to run\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,279 - ClientTaskWorker - INFO - ClientTaskWorker started to run\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,279 - ClientTaskWorker - INFO - ClientTaskWorker started to run\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,280 - ClientTaskWorker - INFO - ClientTaskWorker started to run\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,283 - ClientTaskWorker - INFO - ClientTaskWorker started to run\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,365 - CoreCell - INFO - site-4.simulate_job: created backbone external connector to tcp://localhost:55189\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,365 - CoreCell - INFO - site-3.simulate_job: created backbone external connector to tcp://localhost:55189\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,366 - CoreCell - INFO - site-5.simulate_job: created backbone external connector to tcp://localhost:55189\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,366 - CoreCell - INFO - site-2.simulate_job: created backbone external connector to tcp://localhost:55189\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,366 - conn_manager - INFO - Connector [CH00001 ACTIVE tcp://localhost:55189] is starting\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,366 - conn_manager - INFO - Connector [CH00001 ACTIVE tcp://localhost:55189] is starting\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,366 - conn_manager - INFO - Connector [CH00001 ACTIVE tcp://localhost:55189] is starting\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,366 - CoreCell - INFO - site-1.simulate_job: created backbone external connector to tcp://localhost:55189\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,366 - conn_manager - INFO - Connector [CH00001 ACTIVE tcp://localhost:55189] is starting\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,366 - conn_manager - INFO - Connector [CH00001 ACTIVE tcp://localhost:55189] is starting\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,367 - conn_manager - INFO - Connection [CN00002 127.0.0.1:55217 => 127.0.0.1:55189] is created: PID: 15317\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,367 - conn_manager - INFO - Connection [CN00002 127.0.0.1:55214 => 127.0.0.1:55189] is created: PID: 15316\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,367 - conn_manager - INFO - Connection [CN00002 127.0.0.1:55218 => 127.0.0.1:55189] is created: PID: 15319\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,367 - conn_manager - INFO - Connection [CN00002 127.0.0.1:55215 => 127.0.0.1:55189] is created: PID: 15318\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,367 - conn_manager - INFO - Connection [CN00002 127.0.0.1:55216 => 127.0.0.1:55189] is created: PID: 15320\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,368 - conn_manager - INFO - Connection [CN00008 127.0.0.1:55189 <= 127.0.0.1:55217] is created: PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,369 - conn_manager - INFO - Connection [CN00009 127.0.0.1:55189 <= 127.0.0.1:55214] is created: PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,369 - conn_manager - INFO - Connection [CN00010 127.0.0.1:55189 <= 127.0.0.1:55218] is created: PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,369 - conn_manager - INFO - Connection [CN00011 127.0.0.1:55189 <= 127.0.0.1:55215] is created: PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,370 - conn_manager - INFO - Connection [CN00012 127.0.0.1:55189 <= 127.0.0.1:55216] is created: PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,468 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,468 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,469 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,469 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,470 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,471 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,471 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,471 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,471 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:56,471 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Request\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,322 - AuxRunner - INFO - registered aux handler for topic ObjectStreamer.Abort\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __end_run__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __end_run__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __do_task__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __do_task__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __end_run__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __do_task__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __end_run__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - Cell - INFO - Register blob CB for channel='aux_communication', topic='*'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - Cell - INFO - Register blob CB for channel='aux_communication', topic='*'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __do_task__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - Cell - INFO - Register blob CB for channel='aux_communication', topic='*'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __end_run__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - Cell - INFO - Register blob CB for channel='aux_communication', topic='*'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - AuxRunner - INFO - registered aux handler for topic __do_task__\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,323 - Cell - INFO - Register blob CB for channel='aux_communication', topic='*'\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,831 - ClientRunner - INFO - [identity=site-5, run=simulate_job] - synced to Server Runner in 0.506201982498169 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REQUEST\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REPLY\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - ClientRunner - INFO - [identity=site-4, run=simulate_job] - synced to Server Runner in 0.5069057941436768 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - ReliableMessage - INFO - enabled reliable message: max_request_workers=20 query_interval=2.0\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REQUEST\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REPLY\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic send_value\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic fed.event\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - ReliableMessage - INFO - enabled reliable message: max_request_workers=20 query_interval=2.0\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - ClientRunner - INFO - [identity=site-5, run=simulate_job] - client runner started\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - ClientRunner - INFO - [identity=site-3, run=simulate_job] - synced to Server Runner in 0.5071201324462891 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - ClientTaskWorker - INFO - Initialize ClientRunner for client: site-5\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic send_value\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REQUEST\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REPLY\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - AuxRunner - INFO - registered aux handler for topic fed.event\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,832 - ClientRunner - INFO - [identity=site-4, run=simulate_job] - client runner started\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - ReliableMessage - INFO - enabled reliable message: max_request_workers=20 query_interval=2.0\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - ClientTaskWorker - INFO - Initialize ClientRunner for client: site-4\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - AuxRunner - INFO - registered aux handler for topic send_value\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - AuxRunner - INFO - registered aux handler for topic fed.event\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - ClientRunner - INFO - [identity=site-3, run=simulate_job] - client runner started\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - ClientTaskWorker - INFO - Initialize ClientRunner for client: site-3\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - ClientRunner - INFO - [identity=site-2, run=simulate_job] - synced to Server Runner in 0.5078608989715576 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REQUEST\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - ClientRunner - INFO - [identity=site-1, run=simulate_job] - synced to Server Runner in 0.5078649520874023 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REPLY\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REQUEST\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,833 - ReliableMessage - INFO - enabled reliable message: max_request_workers=20 query_interval=2.0\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - AuxRunner - INFO - registered aux handler for topic RM.RELIABLE_REPLY\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - ReliableMessage - INFO - enabled reliable message: max_request_workers=20 query_interval=2.0\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - AuxRunner - INFO - registered aux handler for topic send_value\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - AuxRunner - INFO - registered aux handler for topic fed.event\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - ClientRunner - INFO - [identity=site-2, run=simulate_job] - client runner started\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - AuxRunner - INFO - registered aux handler for topic send_value\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - ClientTaskWorker - INFO - Initialize ClientRunner for client: site-2\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - AuxRunner - INFO - registered aux handler for topic fed.event\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - ClientRunner - INFO - [identity=site-1, run=simulate_job] - client runner started\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,834 - ClientTaskWorker - INFO - Initialize ClientRunner for client: site-1\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,836 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-1, peer_run=simulate_job, task_name=config, task_id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c] - assigned task to client site-1: name=config, id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,837 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-1, peer_run=simulate_job, task_name=config, task_id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c] - sent task assignment to client. client_name:site-1 task_id:58b0e0a5-eae9-4c65-b3ce-38d620cfc39c\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,837 - GetTaskCommand - INFO - return task to client. client_name: site-1 task_name: config task_id: 58b0e0a5-eae9-4c65-b3ce-38d620cfc39c sharable_header_task_id: 58b0e0a5-eae9-4c65-b3ce-38d620cfc39c\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,838 - Communicator - INFO - Received from simulator_server server. getTask: config size: 728B (728 Bytes) time: 0.003995 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,838 - FederatedClient - INFO - pull_task completed. Task name:config Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,838 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=config, id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,839 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,839 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,839 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,839 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,841 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,841 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,842 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-1, peer_run=simulate_job] - got result from client site-1 for task: name=config, id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,842 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-1, peer_run=simulate_job, peer_rc=OK, task_name=config, task_id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,842 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-1 task_id:58b0e0a5-eae9-4c65-b3ce-38d620cfc39c\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,842 - Communicator - INFO - SubmitUpdate size: 606B (606 Bytes). time: 0.001474 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,842 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=58b0e0a5-eae9-4c65-b3ce-38d620cfc39c] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,843 - ClientTaskWorker - INFO - Finished one task run for client: site-1 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,942 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - task config exit with status TaskCompletionStatus.OK\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:57,944 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - scheduled task config\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,348 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-2, peer_run=simulate_job, task_name=config, task_id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e] - assigned task to client site-2: name=config, id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,348 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-2, peer_run=simulate_job, task_name=config, task_id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e] - sent task assignment to client. client_name:site-2 task_id:3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,349 - GetTaskCommand - INFO - return task to client. client_name: site-2 task_name: config task_id: 3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e sharable_header_task_id: 3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,351 - Communicator - INFO - Received from simulator_server server. getTask: config size: 701B (701 Bytes) time: 0.009205 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,352 - FederatedClient - INFO - pull_task completed. Task name:config Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,352 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=config, id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,352 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,353 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,353 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,353 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,358 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,358 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,359 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-2, peer_run=simulate_job] - got result from client site-2 for task: name=config, id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,360 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-2, peer_run=simulate_job, peer_rc=OK, task_name=config, task_id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,360 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-2 task_id:3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,361 - Communicator - INFO - SubmitUpdate size: 606B (606 Bytes). time: 0.002935 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,361 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=3b6a97b5-7ef4-4b0e-bff4-f305bfa6e69e] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,361 - ClientTaskWorker - INFO - Finished one task run for client: site-2 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,552 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - task config exit with status TaskCompletionStatus.OK\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,553 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - scheduled task config\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,852 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-3, peer_run=simulate_job, task_name=config, task_id=60862047-5450-4653-a6a9-5ba74fa83953] - assigned task to client site-3: name=config, id=60862047-5450-4653-a6a9-5ba74fa83953\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,854 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-3, peer_run=simulate_job, task_name=config, task_id=60862047-5450-4653-a6a9-5ba74fa83953] - sent task assignment to client. client_name:site-3 task_id:60862047-5450-4653-a6a9-5ba74fa83953\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,855 - GetTaskCommand - INFO - return task to client. client_name: site-3 task_name: config task_id: 60862047-5450-4653-a6a9-5ba74fa83953 sharable_header_task_id: 60862047-5450-4653-a6a9-5ba74fa83953\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,858 - Communicator - INFO - Received from simulator_server server. getTask: config size: 728B (728 Bytes) time: 0.009255 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,859 - FederatedClient - INFO - pull_task completed. Task name:config Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,859 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=config, id=60862047-5450-4653-a6a9-5ba74fa83953\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,859 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=60862047-5450-4653-a6a9-5ba74fa83953] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,860 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=60862047-5450-4653-a6a9-5ba74fa83953] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,860 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=60862047-5450-4653-a6a9-5ba74fa83953] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,860 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=60862047-5450-4653-a6a9-5ba74fa83953] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,866 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=60862047-5450-4653-a6a9-5ba74fa83953] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,866 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,868 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-3, peer_run=simulate_job] - got result from client site-3 for task: name=config, id=60862047-5450-4653-a6a9-5ba74fa83953\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,868 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-3, peer_run=simulate_job, peer_rc=OK, task_name=config, task_id=60862047-5450-4653-a6a9-5ba74fa83953] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,868 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-3 task_id:60862047-5450-4653-a6a9-5ba74fa83953\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,869 - Communicator - INFO - SubmitUpdate size: 606B (606 Bytes). time: 0.003179 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,870 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=60862047-5450-4653-a6a9-5ba74fa83953] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,870 - ClientTaskWorker - INFO - Finished one task run for client: site-3 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,959 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - task config exit with status TaskCompletionStatus.OK\u001b[0m\n", + "\u001b[38m2025-02-03 10:22:58,962 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - scheduled task config\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,864 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-4, peer_run=simulate_job, task_name=config, task_id=06c011e8-6aee-4711-8593-c7812434da85] - assigned task to client site-4: name=config, id=06c011e8-6aee-4711-8593-c7812434da85\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,864 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-4, peer_run=simulate_job, task_name=config, task_id=06c011e8-6aee-4711-8593-c7812434da85] - sent task assignment to client. client_name:site-4 task_id:06c011e8-6aee-4711-8593-c7812434da85\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,865 - GetTaskCommand - INFO - return task to client. client_name: site-4 task_name: config task_id: 06c011e8-6aee-4711-8593-c7812434da85 sharable_header_task_id: 06c011e8-6aee-4711-8593-c7812434da85\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,866 - Communicator - INFO - Received from simulator_server server. getTask: config size: 728B (728 Bytes) time: 0.003766 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,866 - FederatedClient - INFO - pull_task completed. Task name:config Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,866 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=config, id=06c011e8-6aee-4711-8593-c7812434da85\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,867 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=06c011e8-6aee-4711-8593-c7812434da85] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,867 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=06c011e8-6aee-4711-8593-c7812434da85] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,867 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=06c011e8-6aee-4711-8593-c7812434da85] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,867 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=06c011e8-6aee-4711-8593-c7812434da85] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,870 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=06c011e8-6aee-4711-8593-c7812434da85] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,870 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,871 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-4, peer_run=simulate_job] - got result from client site-4 for task: name=config, id=06c011e8-6aee-4711-8593-c7812434da85\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,871 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-4, peer_run=simulate_job, peer_rc=OK, task_name=config, task_id=06c011e8-6aee-4711-8593-c7812434da85] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,871 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-4 task_id:06c011e8-6aee-4711-8593-c7812434da85\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,872 - Communicator - INFO - SubmitUpdate size: 606B (606 Bytes). time: 0.002201 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,872 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=06c011e8-6aee-4711-8593-c7812434da85] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,873 - ClientTaskWorker - INFO - Finished one task run for client: site-4 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:00,995 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - task config exit with status TaskCompletionStatus.OK\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,003 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - scheduled task config\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,372 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-5, peer_run=simulate_job, task_name=config, task_id=346019d9-5f73-4e6e-807d-b27d2b99e45d] - assigned task to client site-5: name=config, id=346019d9-5f73-4e6e-807d-b27d2b99e45d\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,373 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-5, peer_run=simulate_job, task_name=config, task_id=346019d9-5f73-4e6e-807d-b27d2b99e45d] - sent task assignment to client. client_name:site-5 task_id:346019d9-5f73-4e6e-807d-b27d2b99e45d\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,373 - GetTaskCommand - INFO - return task to client. client_name: site-5 task_name: config task_id: 346019d9-5f73-4e6e-807d-b27d2b99e45d sharable_header_task_id: 346019d9-5f73-4e6e-807d-b27d2b99e45d\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,378 - Communicator - INFO - Received from simulator_server server. getTask: config size: 701B (701 Bytes) time: 0.008450 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,378 - FederatedClient - INFO - pull_task completed. Task name:config Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,379 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=config, id=346019d9-5f73-4e6e-807d-b27d2b99e45d\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,380 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=346019d9-5f73-4e6e-807d-b27d2b99e45d] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,380 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=346019d9-5f73-4e6e-807d-b27d2b99e45d] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,381 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=346019d9-5f73-4e6e-807d-b27d2b99e45d] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,381 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=346019d9-5f73-4e6e-807d-b27d2b99e45d] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,386 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=346019d9-5f73-4e6e-807d-b27d2b99e45d] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,386 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,388 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-5, peer_run=simulate_job] - got result from client site-5 for task: name=config, id=346019d9-5f73-4e6e-807d-b27d2b99e45d\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,389 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-5, peer_run=simulate_job, peer_rc=OK, task_name=config, task_id=346019d9-5f73-4e6e-807d-b27d2b99e45d] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,389 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-5 task_id:346019d9-5f73-4e6e-807d-b27d2b99e45d\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,390 - Communicator - INFO - SubmitUpdate size: 606B (606 Bytes). time: 0.004067 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,390 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=config, task_id=346019d9-5f73-4e6e-807d-b27d2b99e45d] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,391 - ClientTaskWorker - INFO - Finished one task run for client: site-5 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,403 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - task config exit with status TaskCompletionStatus.OK\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,406 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - scheduled task run_algorithm\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,884 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-3, peer_run=simulate_job, task_name=run_algorithm, task_id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469] - assigned task to client site-3: name=run_algorithm, id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,884 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-3, peer_run=simulate_job, task_name=run_algorithm, task_id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469] - sent task assignment to client. client_name:site-3 task_id:37ae4da3-16fd-4e8c-b7d6-c95eaef20469\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,885 - GetTaskCommand - INFO - return task to client. client_name: site-3 task_name: run_algorithm task_id: 37ae4da3-16fd-4e8c-b7d6-c95eaef20469 sharable_header_task_id: 37ae4da3-16fd-4e8c-b7d6-c95eaef20469\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,888 - Communicator - INFO - Received from simulator_server server. getTask: run_algorithm size: 689B (689 Bytes) time: 0.006691 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,888 - FederatedClient - INFO - pull_task completed. Task name:run_algorithm Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,889 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=run_algorithm, id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:01,889 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,873 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-1, peer_run=simulate_job, task_name=run_algorithm, task_id=117b3e94-ce69-49af-9188-61f141252bc4] - assigned task to client site-1: name=run_algorithm, id=117b3e94-ce69-49af-9188-61f141252bc4\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,874 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-1, peer_run=simulate_job, task_name=run_algorithm, task_id=117b3e94-ce69-49af-9188-61f141252bc4] - sent task assignment to client. client_name:site-1 task_id:117b3e94-ce69-49af-9188-61f141252bc4\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,875 - GetTaskCommand - INFO - return task to client. client_name: site-1 task_name: run_algorithm task_id: 117b3e94-ce69-49af-9188-61f141252bc4 sharable_header_task_id: 117b3e94-ce69-49af-9188-61f141252bc4\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,876 - Communicator - INFO - Received from simulator_server server. getTask: run_algorithm size: 689B (689 Bytes) time: 0.006367 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,877 - FederatedClient - INFO - pull_task completed. Task name:run_algorithm Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,877 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-4, peer_run=simulate_job, task_name=run_algorithm, task_id=035682b1-7197-4165-8faf-dbbb1c151a97] - assigned task to client site-4: name=run_algorithm, id=035682b1-7197-4165-8faf-dbbb1c151a97\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,877 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=run_algorithm, id=117b3e94-ce69-49af-9188-61f141252bc4\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,878 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-4, peer_run=simulate_job, task_name=run_algorithm, task_id=035682b1-7197-4165-8faf-dbbb1c151a97] - sent task assignment to client. client_name:site-4 task_id:035682b1-7197-4165-8faf-dbbb1c151a97\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,879 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=117b3e94-ce69-49af-9188-61f141252bc4] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,879 - GetTaskCommand - INFO - return task to client. client_name: site-4 task_name: run_algorithm task_id: 035682b1-7197-4165-8faf-dbbb1c151a97 sharable_header_task_id: 035682b1-7197-4165-8faf-dbbb1c151a97\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,881 - Communicator - INFO - Received from simulator_server server. getTask: run_algorithm size: 689B (689 Bytes) time: 0.005837 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,882 - FederatedClient - INFO - pull_task completed. Task name:run_algorithm Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,882 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=run_algorithm, id=035682b1-7197-4165-8faf-dbbb1c151a97\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:02,884 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=035682b1-7197-4165-8faf-dbbb1c151a97] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,394 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-2, peer_run=simulate_job, task_name=run_algorithm, task_id=0117fb75-76e2-4e60-8f23-3adee42b0a38] - assigned task to client site-2: name=run_algorithm, id=0117fb75-76e2-4e60-8f23-3adee42b0a38\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,395 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-2, peer_run=simulate_job, task_name=run_algorithm, task_id=0117fb75-76e2-4e60-8f23-3adee42b0a38] - sent task assignment to client. client_name:site-2 task_id:0117fb75-76e2-4e60-8f23-3adee42b0a38\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,395 - GetTaskCommand - INFO - return task to client. client_name: site-2 task_name: run_algorithm task_id: 0117fb75-76e2-4e60-8f23-3adee42b0a38 sharable_header_task_id: 0117fb75-76e2-4e60-8f23-3adee42b0a38\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,398 - Communicator - INFO - Received from simulator_server server. getTask: run_algorithm size: 689B (689 Bytes) time: 0.007840 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,398 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-5, peer_run=simulate_job, task_name=run_algorithm, task_id=2a786117-6736-4998-97cc-27b5afe05547] - assigned task to client site-5: name=run_algorithm, id=2a786117-6736-4998-97cc-27b5afe05547\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,399 - FederatedClient - INFO - pull_task completed. Task name:run_algorithm Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,399 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-5, peer_run=simulate_job, task_name=run_algorithm, task_id=2a786117-6736-4998-97cc-27b5afe05547] - sent task assignment to client. client_name:site-5 task_id:2a786117-6736-4998-97cc-27b5afe05547\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,399 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=run_algorithm, id=0117fb75-76e2-4e60-8f23-3adee42b0a38\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,399 - GetTaskCommand - INFO - return task to client. client_name: site-5 task_name: run_algorithm task_id: 2a786117-6736-4998-97cc-27b5afe05547 sharable_header_task_id: 2a786117-6736-4998-97cc-27b5afe05547\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,399 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=0117fb75-76e2-4e60-8f23-3adee42b0a38] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,401 - Communicator - INFO - Received from simulator_server server. getTask: run_algorithm size: 689B (689 Bytes) time: 0.005055 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,401 - FederatedClient - INFO - pull_task completed. Task name:run_algorithm Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,402 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - got task assignment: name=run_algorithm, id=2a786117-6736-4998-97cc-27b5afe05547\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,402 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=2a786117-6736-4998-97cc-27b5afe05547] - invoking task executor ConsensusExecutor\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=2a786117-6736-4998-97cc-27b5afe05547] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,562 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=0117fb75-76e2-4e60-8f23-3adee42b0a38] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,562 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=035682b1-7197-4165-8faf-dbbb1c151a97] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,562 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=117b3e94-ce69-49af-9188-61f141252bc4] - finished processing task\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=035682b1-7197-4165-8faf-dbbb1c151a97] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=0117fb75-76e2-4e60-8f23-3adee42b0a38] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=035682b1-7197-4165-8faf-dbbb1c151a97] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=0117fb75-76e2-4e60-8f23-3adee42b0a38] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=2a786117-6736-4998-97cc-27b5afe05547] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=117b3e94-ce69-49af-9188-61f141252bc4] - try #1: sending task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=117b3e94-ce69-49af-9188-61f141252bc4] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,563 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=2a786117-6736-4998-97cc-27b5afe05547] - checking task ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,565 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=117b3e94-ce69-49af-9188-61f141252bc4] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,565 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,567 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,567 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,567 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=0117fb75-76e2-4e60-8f23-3adee42b0a38] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,567 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,567 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-1, peer_run=simulate_job] - got result from client site-1 for task: name=run_algorithm, id=117b3e94-ce69-49af-9188-61f141252bc4\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,568 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-1, peer_run=simulate_job, peer_rc=OK, task_name=run_algorithm, task_id=117b3e94-ce69-49af-9188-61f141252bc4] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,568 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=2a786117-6736-4998-97cc-27b5afe05547] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,568 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=035682b1-7197-4165-8faf-dbbb1c151a97] - start to send task result to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,568 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,568 - FederatedClient - INFO - Starting to push execute result.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,568 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-1 task_id:117b3e94-ce69-49af-9188-61f141252bc4\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,568 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-3, peer_run=simulate_job] - got result from client site-3 for task: name=run_algorithm, id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,569 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-2, peer_run=simulate_job] - got result from client site-2 for task: name=run_algorithm, id=0117fb75-76e2-4e60-8f23-3adee42b0a38\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,569 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-3, peer_run=simulate_job, peer_rc=OK, task_name=run_algorithm, task_id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,569 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-3 task_id:37ae4da3-16fd-4e8c-b7d6-c95eaef20469\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,570 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-2, peer_run=simulate_job, peer_rc=OK, task_name=run_algorithm, task_id=0117fb75-76e2-4e60-8f23-3adee42b0a38] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,570 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-2 task_id:0117fb75-76e2-4e60-8f23-3adee42b0a38\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,570 - Communicator - INFO - SubmitUpdate size: 613B (613 Bytes). time: 0.004786 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,570 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=117b3e94-ce69-49af-9188-61f141252bc4] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,570 - ClientTaskWorker - INFO - Finished one task run for client: site-1 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,571 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-5, peer_run=simulate_job] - got result from client site-5 for task: name=run_algorithm, id=2a786117-6736-4998-97cc-27b5afe05547\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,571 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-5, peer_run=simulate_job, peer_rc=OK, task_name=run_algorithm, task_id=2a786117-6736-4998-97cc-27b5afe05547] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,571 - Communicator - INFO - SubmitUpdate size: 613B (613 Bytes). time: 0.003910 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,571 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-5 task_id:2a786117-6736-4998-97cc-27b5afe05547\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,571 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=37ae4da3-16fd-4e8c-b7d6-c95eaef20469] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,571 - ClientTaskWorker - INFO - Finished one task run for client: site-3 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,571 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-4, peer_run=simulate_job] - got result from client site-4 for task: name=run_algorithm, id=035682b1-7197-4165-8faf-dbbb1c151a97\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - Communicator - INFO - SubmitUpdate size: 613B (613 Bytes). time: 0.004425 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-4, peer_run=simulate_job, peer_rc=OK, task_name=run_algorithm, task_id=035682b1-7197-4165-8faf-dbbb1c151a97] - finished processing client result by controller\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - SubmitUpdateCommand - INFO - submit_update process. client_name:site-4 task_id:035682b1-7197-4165-8faf-dbbb1c151a97\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - Communicator - INFO - SubmitUpdate size: 613B (613 Bytes). time: 0.003643 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=0117fb75-76e2-4e60-8f23-3adee42b0a38] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=2a786117-6736-4998-97cc-27b5afe05547] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - ClientTaskWorker - INFO - Finished one task run for client: site-2 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - ClientTaskWorker - INFO - Finished one task run for client: site-5 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - Communicator - INFO - SubmitUpdate size: 613B (613 Bytes). time: 0.004091 seconds\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job, task_name=run_algorithm, task_id=035682b1-7197-4165-8faf-dbbb1c151a97] - task result sent to server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,572 - ClientTaskWorker - INFO - Finished one task run for client: site-4 interval: 2 task_processed: True\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,640 - WFCommServer - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - task run_algorithm exit with status TaskCompletionStatus.OK\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,840 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - Workflow: controller finalizing ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,841 - DistOptController - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - P2PAlgorithmController stopped\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,842 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - ABOUT_TO_END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,845 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - received request from Server to end current RUN\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,845 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - received request from Server to end current RUN\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,846 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - Firing CHECK_END_RUN_READINESS ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,846 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - received request from Server to end current RUN\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,847 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,847 - ReliableMessage - INFO - ReliableMessage is shutdown\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,847 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - received request from Server to end current RUN\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,847 - ObjectStreamer - INFO - Stream Runer is Shut Down\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,847 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - received request from Server to end current RUN\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:03,848 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - Server runner finished.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,281 - SimulatorServer - INFO - Server app stopped.\n", + "\n", + "\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,288 - ReliableMessage - INFO - shutdown reliable message monitor\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,576 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-3, peer_run=simulate_job] - server runner is finalizing - asked client to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,576 - GetTaskCommand - INFO - return task to client. client_name: site-3 task_name: __end_run__ task_id: sharable_header_task_id: \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,578 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-1, peer_run=simulate_job] - server runner is finalizing - asked client to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,578 - FederatedClient - INFO - pull_task completed. Task name:__end_run__ Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,578 - ClientRunner - INFO - [identity=site-3, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - server asked to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,578 - GetTaskCommand - INFO - return task to client. client_name: site-1 task_name: __end_run__ task_id: sharable_header_task_id: \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,579 - ClientRunner - INFO - [identity=site-3, run=simulate_job] - started end-run events sequence\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,579 - ClientRunner - INFO - [identity=site-3, run=simulate_job] - ABOUT_TO_END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,579 - ClientRunner - INFO - [identity=site-3, run=simulate_job] - Firing CHECK_END_RUN_READINESS ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,579 - ClientRunner - INFO - [identity=site-3, run=simulate_job] - END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,579 - ClientTaskWorker - INFO - End the Simulator run.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,580 - ClientTaskWorker - INFO - Clean up ClientRunner for : site-3 \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,581 - FederatedClient - INFO - pull_task completed. Task name:__end_run__ Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,581 - ClientRunner - INFO - [identity=site-1, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - server asked to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,581 - ClientRunner - INFO - [identity=site-1, run=simulate_job] - started end-run events sequence\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,581 - conn_manager - INFO - Connection [CN00002 Not Connected] is closed PID: 15316\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,581 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-2, peer_run=simulate_job] - server runner is finalizing - asked client to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,581 - ClientRunner - INFO - [identity=site-1, run=simulate_job] - ABOUT_TO_END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,581 - GetTaskCommand - INFO - return task to client. client_name: site-2 task_name: __end_run__ task_id: sharable_header_task_id: \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,581 - ClientRunner - INFO - [identity=site-1, run=simulate_job] - Firing CHECK_END_RUN_READINESS ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,581 - conn_manager - INFO - Connection [CN00009 Not Connected] is closed PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,582 - ClientRunner - INFO - [identity=site-1, run=simulate_job] - END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,582 - ClientTaskWorker - INFO - End the Simulator run.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,582 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-4, peer_run=simulate_job] - server runner is finalizing - asked client to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,583 - ClientTaskWorker - INFO - Clean up ClientRunner for : site-1 \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,583 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller, peer=site-5, peer_run=simulate_job] - server runner is finalizing - asked client to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,583 - GetTaskCommand - INFO - return task to client. client_name: site-4 task_name: __end_run__ task_id: sharable_header_task_id: \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,583 - FederatedClient - INFO - pull_task completed. Task name:__end_run__ Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,583 - GetTaskCommand - INFO - return task to client. client_name: site-5 task_name: __end_run__ task_id: sharable_header_task_id: \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,584 - ClientRunner - INFO - [identity=site-2, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - server asked to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,584 - ClientRunner - INFO - [identity=site-2, run=simulate_job] - started end-run events sequence\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,584 - ClientRunner - INFO - [identity=site-2, run=simulate_job] - ABOUT_TO_END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,584 - conn_manager - INFO - Connection [CN00002 Not Connected] is closed PID: 15318\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,584 - ClientRunner - INFO - [identity=site-2, run=simulate_job] - Firing CHECK_END_RUN_READINESS ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,584 - ClientRunner - INFO - [identity=site-2, run=simulate_job] - END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,584 - conn_manager - INFO - Connection [CN00011 Not Connected] is closed PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,584 - ClientTaskWorker - INFO - End the Simulator run.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,585 - FederatedClient - INFO - pull_task completed. Task name:__end_run__ Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,585 - ClientTaskWorker - INFO - Clean up ClientRunner for : site-2 \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,585 - ClientRunner - INFO - [identity=site-4, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - server asked to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,585 - ClientRunner - INFO - [identity=site-4, run=simulate_job] - started end-run events sequence\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,585 - ClientRunner - INFO - [identity=site-4, run=simulate_job] - ABOUT_TO_END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,585 - FederatedClient - INFO - pull_task completed. Task name:__end_run__ Status:True \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,585 - ClientRunner - INFO - [identity=site-4, run=simulate_job] - Firing CHECK_END_RUN_READINESS ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - ClientRunner - INFO - [identity=site-5, run=simulate_job, peer=simulator_server, peer_run=simulate_job] - server asked to end the run\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - ClientRunner - INFO - [identity=site-4, run=simulate_job] - END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - ClientRunner - INFO - [identity=site-5, run=simulate_job] - started end-run events sequence\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - ClientTaskWorker - INFO - End the Simulator run.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - ClientRunner - INFO - [identity=site-5, run=simulate_job] - ABOUT_TO_END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - conn_manager - INFO - Connection [CN00002 Not Connected] is closed PID: 15317\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - conn_manager - INFO - Connection [CN00008 Not Connected] is closed PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - ClientRunner - INFO - [identity=site-5, run=simulate_job] - Firing CHECK_END_RUN_READINESS ...\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - ClientRunner - INFO - [identity=site-5, run=simulate_job] - END_RUN fired\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - ClientTaskWorker - INFO - End the Simulator run.\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,586 - ClientTaskWorker - INFO - Clean up ClientRunner for : site-4 \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,587 - ClientTaskWorker - INFO - Clean up ClientRunner for : site-5 \u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,587 - FederatedClient - INFO - Shutting down client run: site-1\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,587 - conn_manager - INFO - Connection [CN00002 Not Connected] is closed PID: 15320\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,588 - conn_manager - INFO - Connection [CN00012 Not Connected] is closed PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,588 - FederatedClient - INFO - Shutting down client run: site-2\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,588 - conn_manager - INFO - Connection [CN00002 Not Connected] is closed PID: 15319\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,588 - conn_manager - INFO - Connection [CN00010 Not Connected] is closed PID: 15299\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,588 - FederatedClient - INFO - Shutting down client run: site-3\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,588 - FederatedClient - INFO - Shutting down client run: site-4\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,589 - FederatedClient - INFO - Shutting down client run: site-5\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,589 - ServerRunner - INFO - [identity=simulator_server, run=simulate_job, wf=controller] - asked to abort - triggered abort_signal to stop the RUN\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,714 - hci - INFO - Admin Server localhost on Port 55190 shutdown!\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,715 - SimulatorServer - INFO - shutting down server\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,715 - SimulatorServer - INFO - canceling sync locks\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:05,715 - SimulatorServer - INFO - server off\u001b[0m\n", + "\u001b[38m2025-02-03 10:23:09,171 - MPM - INFO - MPM: Good Bye!\u001b[0m\n" + ] + } + ], + "source": [ + "# run\n", + "job.export_job(\"./tmp/job_configs\")\n", + "job.simulator_run(\"./tmp/runs/consensus\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, since in this toy example we have access to both the intial values and the adjacency matrix, we can compute the consensus value at which the clients will converge." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/82/970ljrw529v0chx2svm8gwwr0000gp/T/ipykernel_15268/4202699194.py:6: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " f\"site-{i+1}\": torch.load(f\"tmp/runs/consensus/site-{i+1}/value_sequence.pt\") for i in range(num_clients)\n", + "/Users/ffarina/Documents/code/NVFlare/nvflare/app_opt/p2p/utils/topology.py:37: UserWarning: Casting complex values to real discards the imaginary part (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/Copy.cpp:308.)\n", + " idx = eigenvalues.float().argsort(descending=True)\n" + ] + }, + { + "data": { + "text/html": [ + "
Consensus value: 0.2560408115386963\n",
+       "
\n" + ], + "text/plain": [ + "Consensus value: \u001b[1;36m0.2560408115386963\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import torch\n", + "\n", + "from nvflare.app_opt.p2p.utils.topology import get_matrix_steady_state\n", + "\n", + "history = {\n", + " f\"site-{i+1}\": torch.load(f\"tmp/runs/consensus/site-{i+1}/value_sequence.pt\") for i in range(num_clients)\n", + "}\n", + "x0=torch.tensor([h[0] for name, h in history.items()]).t()\n", + "consensus_value = x0 @ get_matrix_steady_state(adjacency_matrix)\n", + "print(f\"Consensus value: {consensus_value}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By inspecting the history/results, we can easily see that all the clients successfully converged to the expected consensus value. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGzCAYAAAD9pBdvAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAYmZJREFUeJzt3Qd8k9X6B/BfZndLSwsUKHuJKFMQEVFAURRlCaLXdS/+HXivinodeMFxLwiK4wJuUa/jigNcV5ZMRWQKsgTZu6V00pU2ef+f56QJSdq0aWmStvl9/bwmeXOSnLwNeZ+c85xzdJqmaSAiIiIKEn2wXpiIiIhIMBghIiKioGIwQkREREHFYISIiIiCisEIERERBRWDESIiIgoqBiNEREQUVAxGiIiIKKgYjBAREVFQMRghCjCdToenn366Rp/z/fffV8978OBB1GYvvPAC2rRpA4PBgG7dunktd8cdd6BVq1YBrVttqsfll1+uNqJQwWCEQpLj5O1t++WXX1AbTZ06FV999RXqoiVLluDvf/87+vXrh/fee0+9FyIiYeRhoFD27LPPonXr1mX2t2vXDrWRnMBHjx6N4cOHu+2/9dZbcdNNNyEsLAy11fLly6HX6/Huu+/CbDYHuzpEVIswGKGQds0116BXr16o66TbQ7baLC0tDREREQxEiKgMdtMQeVFcXIyEhATceeedZe7LyclBeHg4HnnkEbeT7V/+8hc0btxY3de1a1d88MEH1c5LkLwS6TJykOt5eXnqOR3dSfLYinJGXnvtNZx//vmqxaRp06aYMGECsrKy3MpIbkKXLl2wc+dOXHHFFYiMjESzZs0wY8YMn45TSUkJnnvuObRt21a9jryXJ598EkVFRW51l64Zqb+j7lLnqpDHPvzww0hJSVGv07FjR7z44osob+Hxjz76CL1791bvJT4+HpdddpnqJnL4+uuvce2116pjIs8ldZf3YLVaUVXXXXedyoMpT9++fd2CXTkGAwcORKNGjdTrdu7cGa+//nqlr+Ht77ty5Uq1Xy5drVu3DldffTXi4uLUMRgwYADWrFnjViY3NxcPPvig+ntJXaROV155JTZv3lzFI0B07hiMUEjLzs5Genq623b69Gl1n8lkwogRI1SOhsVicXuc7JOTrXSNiIKCAnVS//DDD3HLLbeoRE05EUiw8Oqrr9ZIXeW55aTRv39/dV22u+++22t5CWYk+JAT7syZMzFq1Ci8+eabuOqqq1Sg5SozM1OdvCSAkrKdOnXCY489hoULF1Zar/Hjx2Py5Mno0aMHXn75ZXXimzZtmvPYOOou9Zb6O+ouAYKvJOC4/vrr1fNLPV966SUVjDz66KOYOHGiW9lnnnlGdVvJ30+64eS2BDDSTeR6co+OjlaPlb9Pz5491Xt4/PHHUVVjx47FgQMHsGHDBrf9hw4dUrlHrsdBAo+WLVuqYE2Os9Trvvvuw5w5c1BT5H3KsZWAecqUKaprTwJQCYLWr1/vLHfPPfeo+sjnQoJWCayl5WrXrl01Vhcin2lEIei9996Tn9PlbmFhYc5yixcvVvu+/fZbt8cPHTpUa9OmjfP2K6+8osp99NFHzn0Wi0Xr27evFh0dreXk5Dj3S7kpU6Y4b99+++1ay5Yty9RRynj+E42KilLlvb2fAwcOqNtpaWma2WzWrrrqKs1qtTrLzZ49W5WbO3euc9+AAQPUvv/85z/OfUVFRVqTJk20UaNGVXgct2zZoh47fvx4t/2PPPKI2r98+XK39yn194XnMfnqq6/U8/3zn/90Kzd69GhNp9Npe/fuVbf/+OMPTa/XayNGjHB738Jmszmv5+fnl3nNu+++W4uMjNQKCwu91qM82dnZ6jPz8MMPu+2fMWOGqtuhQ4cqfN0hQ4a4fZYcfxPZvP19HVasWKH2y6XjPbZv3149p+f7bd26tXbllVc698XFxWkTJkyo8L0RBQpbRiikyS/SpUuXum2urQHyazIxMRHz5s1za0WQcvKL2OH7779HkyZNMG7cOOc++WX+t7/9DWfOnMGqVasC+K6AH374QbXmSDO8JI063HXXXYiNjcX//vc/t/LSSvCnP/3JeVvyOqSbY//+/RW+jrxv4dk6Id0pwvN1qkteR3Ji5Hh6vo7Ed46/mbRY2Ww21crh+r6Fa5eXtAC4dldIi5i03OTn5+P333+vUt3keEru0WeffebWZSSfmYsvvhgtWrQo93UdrXLSkiTHWW6fqy1btuCPP/7AzTffrFr4HK190sU1aNAgrF69Wh0f0aBBA9Wdc/z48XN+XaJzxQRWCmlywq0ogdVoNKpm7E8++UR1y0g3w/z581U3h2swIk3y7du3L3MCPO+885z3B5Lj9aQrw5UEGZLf4Fmf5s2bu52sheRa/Pbbb5W+jrxnz9FHEpjJya6m3rc8j3Q3xcTEVHh89+3bp+ojuRgV2bFjB5566inVpSHdGa6qExTIZ0ECobVr1+KSSy5R9di0aRNeeeUVt3KStyFdJ1JOAh/P15WuvXMhgYi4/fbbvZaR15G/reQESTnpKpJuqqFDh+K2227zmv9C5E8MRogqIX3+kmshv75lSK38ApacCsmvqAmeQYBDdZIpq8vbSJzykkOr8h5qI8mfkNYIadGQnBJJXpWEY0nclDwZR8tBVQwbNkwlispnQ4IRuZSg6MYbb3SWkQBFWifksyM5LxIESHAorT6SC1PR6/r6GXE8h+QseZtUTlrBxJgxY1Rr0IIFC1Ryrzxm+vTpKtiWlh6iQGIwQlQJSQZMTk5Wze6XXnqp+jU9adIktzKSlCitCHIycG0dcTT5y/3eyK9UzxEuorxWBV9P+o7X2717t9svXem6kWTLwYMH+/Q8vryOvGf5Re5opRCpqanqPVX0vqv6OtL1JF0qrq0jnsdXAgupj4wM8nYylpEn0oUhJ13XJFo5LtUVFRWlRtV8/vnnKtCQz4qc6KU1x+Hbb79VrWvffPONW9fNihUrKn1++YwIz8+J52dE3r+QQMuXv7F8riWBVjYZDSZJyP/6178YjFDAMWeEqBISXMhEY3IykVEgMpTVtYtGSBP3yZMn3XJLpNysWbPUL1H5Je6NnECk6dy1S+TEiRPqF2t5J73yAhdPciKSX93//ve/3Vo3ZMIxeS0Z1loT5H0Lz+4IOSGLmnwdaQWYPXu2235pUZAAzXHylJYr+XtJi4dnS4PjODhagVyPiwRpMqLkXMhnQvIv3nnnHWzdurXMZ6S815W/hQz3rYwjyJCcDwc5Hm+99ZZbOelukbIy5FlylTydOnXK+VjP7igZ2ivBk+uQbKJAYcsIhTTpeikvYVGa2l1bFOTEIoGF9PdfcMEFbq0A4v/+7/9UV44M5ZVcAZm74YsvvlA5AnKi9sx18OwGku4BGUYsCZqSSyBDLjt06FBmzgc52UgLgZzs5cQhs8f26dOnzHMmJSXhiSeeUMNaZSisDIuVVhI54V500UVuyarnQrqqJO9AToqO7g8ZPipzoUhgIPOW1ATpBpHnkhYpmWtDXle6FmS+EEnSdZysJXdFysicIdIyMXLkSJXnI8Nu5XjJkGP520pLg9RbjrcEMxJk+tolVVHAJH9nGSIrgYfkGrmSIdUSIMp7kSHZEiy8/fbbKgiQ4LMiMleMJMPK3zQjI0PNf/Ppp5+qgNeVBGISDElwJo+ROXJkzphjx46pFhhpMZGgWlqYJE9Igmw5lhIwy+dKjpMMOSYKuICN2yGqI0N7ZZP7XckwyZSUlHKHlzqkpqZqd955p5aYmKiG1V5wwQVlnqe8ob1iyZIlWpcuXdTjOnbsqIYIlze09/fff9cuu+wyLSIiQt3nGObrbeinDOXt1KmTZjKZtMaNG2v33nuvlpmZ6VZGhpCef/75Zerpy7BWUVxcrD3zzDNq6Ki8jhynJ554wm2I7LkO7RW5ubnaQw89pDVt2lS9jgxhfeGFF9yGsDrI0OXu3burIbfx8fHqPS5dutR5/5o1a7SLL75YHUd5vr///e/OYdyOYbJVOQYOt9xyi3qOwYMHl3v/N998o1144YVaeHi41qpVK2369Omqrp5/O8+hvWLfvn3qeeU9yd/yySefVO/Js87i119/1UaOHKk1bNhQlZf3MGbMGG3ZsmXOoduPPvqo1rVrVy0mJkb9XeT6a6+95vN7JapJOvlf4EMgIiIiIjvmjBAREVFQMRghIiKioGIwQkREREHFYISIiIiCisEIERERBRWDESIiIgqqOjHpmcykKDMbyoRCdWkNDCIiolCmaZqaZE8mHfRcSLTOBSMSiMiiUkRERFT3HDlyRM36W6eDEcdU2vJmZDpjIiIiqv1ycnJUY0JFS2LUmWDE0TUjgQiDESIiorqlshQLJrASERFRUDEYISIioqBiMEJERERBVSdyRoiI6OxQyZKSElit1mBXhQgGgwFGo/Gcp91gMEJEVEdYLBacOHEC+fn5wa4KkVNkZCSSk5NhNptRXQxGiIjqyOSPBw4cUL9EZQIp+eLnJJAU7FY6CZBPnTqlPpvt27evcGKzijAYISKqA+RLXwISmbNBfokS1QYREREwmUw4dOiQ+oyGh4dX63mYwEpEVIdU95cnUW3+TPJTTUREREHFYISIiIiCisEIEREFxR133IHhw4cHuxpUCzAYISKioHj11Vfx/vvvO29ffvnlePDBB2vkuefPn4+rrroKDRs2VKOOtmzZUiPPS/4R2sHIL28A3z0EnNod7JoQEYWcuLg4NGjQwC/PnZeXh0svvRTTp0/3y/NTzQrtYGTb58DGuUD6H8GuCRFRteZ5yLeUBHyT162KL774AhdccIEaBiotFYMHD1bBgms3jVxftWqVai2RlgzZDh48qO7bvn07rrnmGkRHR6Nx48a49dZbkZ6eXuFrSpnJkyer16LaL7TnGYlKtF/mV/yhJiKqjQqKreg8eXHAX3fns0MQafbt9CEzxo4bNw4zZszAiBEjkJubix9//LFMQCNByJ49e9ClSxc8++yzal9SUhKysrIwcOBAjB8/Hi+//DIKCgrw2GOPYcyYMVi+fLlf3h8FXmgHI5GlwUgegxEiIn+QYETW0hk5ciRatmyp9kkrSXldNjKrrEzo1qRJE+f+2bNno3v37pg6dapz39y5c9XkbxK8dOjQIUDvhPwpxIORBPtlfkawa0JEVGURJoNqpQjG6/qqa9euGDRokApAhgwZopJKR48ejfj4eJ8ev3XrVqxYsUJ10Xjat28fNmzYgLvvvtu5b+HChejfv7/P9aPaIbSDEXbTEFEdJnkVvnaXBIuspbN06VL8/PPPWLJkCWbNmoVJkyZh3bp1Pj3+zJkzGDZsWLmJqLI4m0yR36dPH+e+Zs2a1Wj9KTBq96fY3yIb2i/zTwe7JkRE9Tpo6tevn9okqVS6axYsWFCmnHTTWK1Wt309evTAl19+iVatWqml6ssTExPjt7pTYIT2aBrmjBAR+ZW0gEi+x8aNG3H48GE1/4es8nreeeeVKSsBh5SXUTQyWkZaPSZMmICMjAyVBCtdMtI1s3jxYtx5551lAhdX8hiZW2Tnzp3q9u7du9XtkydP+vX9UvWEdjDi7KZhywgRkT/ExsZi9erVGDp0qEo2feqppzBz5kw1VNfTI488orp1OnfurEbSSPDStGlTrFmzRgUekm8iuScyMZrMT1LRAm3ffPONSny99tpr1e2bbrpJ3X7jjTf8+n6penRaVQeMB0FOTo7KtM7OzlYf7BqTsR/4d3fAFAlMOlFzz0tEVMMKCwtx4MABtG7dutrLtBMF+rPp6/k7tFtGHN00xfmAJT/YtSEiIgpJoR2MhMUAepP9OkfUEBER1Z1gZM6cOSrRSJpjZEjV+vXrvZaVRZAcU/s6tlrTxKjTMW+EiIiorgUj8+bNw8SJEzFlyhRs3rxZTWgjE9mkpaV5fYz0E8ksfI7t0KFDqH0jahiMEBER1Ylg5KWXXsJdd92lhlVJxrNkJsv0vTI9rzfSGiLT+zo2WeioIkVFRSrpxXXz/yysDEaIiIhqfTBisViwadMmt1UQZWiV3F67dm2FM+jJJDeylsANN9yAHTt2VPg606ZNU9m3jk0e5zechZWIiKjuBCMyCY2M9fZs2ZDb3iaS6dixo2o1+frrr/HRRx+pSWwuueQSHD161OvrPPHEE2oYkGM7cuQI/IYTnxEREdXv6eD79u2rNgcJRGTmvTfffBPPPfdcuY8JCwtTW0BwSngiIqK60zKSmJioZsdLTU112y+3XZd8rojJZFKz4O3duxe1QhSDESIiojoTjMgiRj179sSyZcuc+6TbRW67tn5URLp5tm3bplZbrBXYMkJEFBR33HEHhg8fHuxqUF0cTSPDet9++2188MEH2LVrF+69917k5eWp0TXitttuUzkfDs8++6xaNnr//v1qKPCf/vQnNbR3/PjxqBWYM0JEFBSvvvqqmovK4fLLL1frzpyr4uJiPPbYY2odm6ioKLW+jZybjh8/fs7PTbUkZ2Ts2LFqxUVZBlqSVrt164ZFixY5k1plYSPXxYsyMzPVUGApGx8fr1pWfv75ZzUsuFbgaBoioqCQ0ZL+kJ+fr378/uMf/1BzYcl56IEHHsD111+vVg+m2ie0F8oTZ9KAF9vLoQD+kQ4Y/J7TS0RUM4uRyde3rK0VaLK4qMxg7aMvvvgCzzzzjMoVlHmpJG9QRlhOmDABWVlZ+Oqrr1SXjbS4u5L3K7N9b9++HY8++ih+/PFH1dIhq/e+/PLLKo/RVxs2bEDv3r1Vy3yLFi2q9HbJ/wvl8cwbUTrpGTSgIBOITgpyhYiIfCSByNSmgX/dJ48D5iifisqs2+PGjcOMGTMwYsQI5ObmqqDC83ewdNns2bMHXbp0Ud37IikpSQUrAwcOVF37EoAUFBSoLpgxY8Zg+fLlPldZToYyAWeDBg2q+GYpEBiMSEtIeAOgMMuexMpghIioxkgwUlJSgpEjR6rJL4XkcniSX88ySEJaTlxHZ86ePVu1pEydOtW5T+aukskwJXjp0KGDT7/cJYCRoKjGW9epRjAYceSNqGCEeSNEVIdId4m0UgTjdX0kORuDBg1SAYisYyZdLKNHj1Y5hL7YunUrVqxYgejo6DL37du3T3W/3H333c59CxcuRP/+/d2SWaUVRVpiXn/9dZ/rTYHFYMQxoub0Xo6oIaK6RfI2fOwuCRaZm2rp0qVq4IKMrJw1axYmTZqEdevW+fR4WU5k2LBhmD59epn7ZIoImV5CVo93aNasWZlARPJEpEuHrSK1F4MRwblGiIj8RnI1+vXrpzYZiSndNQsWLChTTrppZC4qVz169MCXX36pElmNxvJPWTExMWX2OQKRP/74Q7WsNGxY+j1P9WOekXqJs7ASEfmFtIBIvocMqZWpH+bPn6+mh5BlQTxJwCHlDx48qNZCk1YPGXGTkZGh8j2kS0a6ZhYvXqzmtvIMXFwDEekKktf8+OOPVTmZXkI2WfCVah8GI64tI+ymISKqUdI1snr1agwdOlQlmz711FOYOXMmrrnmmjJlH3nkEdWtI/NQyUgaCV5kwrI1a9aogELyTST3RCZGk1ExrnNauTp27Bi++eYbtSCrzIUl3TmOTbqLqPZhN43rLKxsGSEiqlHSAiITY5bHdfZVIcHK2rVry5Rr3769alHxlbSw1IEptMgFW0YEZ2ElIiIKGgYjggmsREREQcNgxC1nhMEIERFRoDEY8eymYT8jERFRQDEYcW0ZsVoAy5lg14aIiCikMBgRMoOhMcJ+ncN7iYiIAorBSJkk1oxg14SIiCikMBgpMwsrW0aIiIgCicGI58Rn7KYhIiIKKAYjDpxrhIgooO644w4MHz482NWgWoDBiANnYSUiCqhXX33VbUr4yy+/XK07UxOefvppdOrUCVFRUYiPj8fgwYPVInxUOzEYcYhMsF+yZYSIKCDi4uLUgnf+IOvczJ49G9u2bcNPP/2k1quRhfZkxWCqfRiMlMkZYTBCRHWDLAaXX5wf8K2qi9B98cUXarXdiIgINGzYULVS5OXluXXTyPVVq1ap1hKdTqe2gwcPqvu2b9+uVvmNjo5G48aNceuttyI9veJW7Jtvvlm9Tps2bXD++efjpZdeQk5ODn777bdzOOLkL1y114HdNERUxxSUFKDPJ30C/rrrbl6HSFOkT2VPnDiBcePGYcaMGRgxYgRyc3Px448/lgloJAjZs2cPunTpgmeffVbtS0pKQlZWFgYOHIjx48fj5ZdfRkFBAR577DGMGTMGy5cv96kOFosFb731lmqJ6dq1azXeMfkbgxEHJrASEdU4CUZKSkowcuRItGzZUu2TVhJPEiiYzWZERkaiSZMmzv3S1dK9e3dMnTrVuW/u3LlISUlRwYt0x3jz3Xff4aabbkJ+fj6Sk5OxdOlSJCaW/vCkWoXBiAO7aYiojokwRqhWimC8rq+kJWLQoEEqABkyZIjK2xg9erRKKvXF1q1bsWLFCtVF42nfvn3YsGED7r77bue+hQsXon///ur6FVdcgS1btqgunbffflu1pkgSa6NGjXyuPwUGgxHPlpGibKDEAhjNwa4REVGFJK/C1+6SYDEYDKpF4ueff8aSJUswa9YsTJo0yeeRLWfOnMGwYcMwffr0MvdJa4fNZkOfPme7qpo1a+a8LiNp2rVrp7aLL74Y7du3x7vvvosnnniiht4d1RQGIw4R8YBOD2g2oCADiDnbTEhEROcWNPXr109tkydPVt01CxYsKFNOummsVqvbvh49euDLL79Uo2GMxvJPWTExMT7VQwKXoqKiar4L8ieOpnHQ64GI0uG9nIWViKhGSAuI5Hts3LgRhw8fxvz589Xw2vPOO69MWQk4pLyMopGuFQkeJkyYgIyMDJUEK10y0jWzePFi3HnnnWUCFwcZqfPkk0/il19+waFDh7Bp0yb8+c9/xrFjx3DjjTcG4F1TVTEYccUkViKiGhUbG4vVq1dj6NChKtn0qaeewsyZM9VQXU+PPPKI6tbp3LmzGkkjwUvTpk2xZs0aFXhIvonknsjEaDI/iV5+RJZDnuP333/HqFGj1GtKN8/p06fVKB4Z5ku1j06r6oDxIJCx4ZJpnZ2drT7YfvPeUODQGmD0XKDLKP+9DhFRFRUWFuLAgQNo3bo1wsPDg10dIp8+m76ev9kyUl7LCEfUEBERBQyDEVfspiEiIgo4BiOuOAsrERFRwDEYccWWESIiooBjMFLuLKxsGSEiIgoUBiOuotgyQkREFGgMRlyxm4aIiCjgGIyU100jwUjtn36FiIioXmAwUl7LiK0EKMwOdm2IiIhCAoMRV6ZwwFy6TDW7aoiI/OqOO+7A8OHDg10NqgUYjHidhZUjaoiI/OnVV1/F+++/77x9+eWXq3Vnato999yjVg5+5ZVXavy5qWaUvx5zqAcjWYfYMkJE5GeyZom/LViwQK3eKwvuUe3FlhFPnIWViOoIWefUlp8f8K2q66t+8cUXarXdiIgINGzYEIMHD0ZeXp5bN41cX7VqlWotkVYM2Q4ePKju2759u1rlNzo6Go0bN8att96K9PTKv6OPHTuGv/71r/j4449hMpmqeZQpENgy4okTnxFRHaEVFGB3j54Bf92OmzdBFxnpU9kTJ05g3LhxmDFjBkaMGIHc3Fz8+OOPZQIaCUL27NmDLl264Nlnn1X7kpKSkJWVhYEDB2L8+PF4+eWXUVBQgMceewxjxozB8uXLvb6uzWZTQcujjz6K888//xzfMfkbgxFPkQn2S3bTEBGdMwlGSkpKMHLkSLRs2VLtk1aS8rpszGYzIiMj0aRJE+f+2bNno3v37pg6dapz39y5c5GSkqKClw4dOpT7utOnT4fRaMTf/vY3v7wvqlkMRrx20zAYIaLaTRcRoVopgvG6vuratSsGDRqkApAhQ4bgqquuwujRoxEfH+/T47du3YoVK1aoLhpP+/btw4YNG3D33Xc79y1cuFAFNNLSsnnzZtXdQ7UfgxFPnIWViOoIlVvhY3dJsBgMBixduhQ///wzlixZglmzZmHSpElYt26dT48/c+YMhg0bplo6PCUnJ6vumD59+jj3NWvWDG+++SbS0tLQokUL536r1YqHH35Yjahx5KJQ7cFgxBNzRoiIajxo6tevn9omT56sumtklIsn6aaRoMFVjx498OWXX6JVq1aq26U8MTExbrclV0SSZF1Jq4zsv/POO2vkPVHN4mgaTxxNQ0RUY6QFRPI9Nm7ciMOHD2P+/Pk4deoUzjvvvDJlJeCQ8tJyIaNlpNVjwoQJyMjIUEmw0iUjXTOLFy9WQYVn4OIgI3YkEdZ1k9E0kovSsWPHALxrqioGI167aTKCXRMiojovNjYWq1evxtChQ1Wy6VNPPYWZM2eqobqeHnnkEdWt07lzZzWSRoIXmR9kzZo1KvCQfBPJPZGJ0Ro0aAC9nqew+kKnVXXAeBDk5OSoTOvs7Gz1wfargixguj3jG5NS7VPEExEFWWFhIQ4cOIDWrVsjPJzfS1Q3Ppu+nr8ZVnoKjwP0pf2STGIlIiLyOwYjnmQYmLOrhnkjRERE/hbSwUhafhp2nd6FXEuu+x0cUUNERBQwIR2MTFg2AWO+G4Otp7Z6mYWVSaxERET+FtLBSIOwBuoyqyjL/Q4O7yUiIgoYBiMSjBR6BCOchZWIiChgQjoYiQuLK79lhDkjREREARPSwUh8uH2hJnbTEBER1bFgZM6cOWraXpncRBYoWr9+vU+P+/TTT9UaBcOHD0etzhlhAisREVHtDUbmzZuHiRMnYsqUKWp5ZlkeWhYgkhUSKyJrDchUv/3790dtwW4aIqLgueOOO2rNj1OqY8HISy+9hLvuukstUiTrB7zxxhuIjIzE3LlzvT5G1hS45ZZb8Mwzz6BNmzaoLRwtI9lF2e53sJuGiMjvXn31Vbz//vvO25dffrlad6amAh1piXfdrr766hp5bgpyMGKxWLBp0ya3pZlloSK5vXbtWq+Pe/bZZ9GoUSP85S9/8el1ioqK1Hz2rps/xIfZc0YyCzPLH01TkAnYyl8VkoiIzo2sWSIL3vmLBB8nTpxwbv/973/99loUwGBElnSWVo7GjRu77ZfbJ0+eLPcxP/30E9599128/fbbPr/OtGnT1IfUsaWkpMCf3TRlWkYcwYhmsy+cR0RUC8k6p8VF1oBvVV1f9YsvvlCr7UZERKBhw4bqB2xeXp5bN41cX7VqlWotcbRkSPe+2L59u1rlNzo6Wp1vbr31VnU+qkxYWBiaNGni3OLj7T9AqfYpXRHOP3Jzc9WHRgKRxMTSrg8fPPHEEyovxUFaRvwRkDi6aQqthSgsKUS4sXS1QYMJkEBFghSZaySqNDghIqpFSiw2vPXAqoC/7v+9OgCmMINPZaVFYty4cZgxYwZGjBihzgs//vhjmYBGgpA9e/agS5cuqjVdJCUlISsrCwMHDsT48ePx8ssvo6CgAI899hjGjBmD5cuXV/jaK1euVK3yEoTIc/zzn/9UwRDV8WBEAgqDwYDU1FS3/XJbok5P+/btU5HtsGHDnPtsNpv9hY1G7N69G23bti03mpXN36JMUTDqjSixlagk1iZGl/cgAYgKRiT67uD3uhAR1UcSjJSUlGDkyJFo2bKl2ietJJ6kFdxsNqscRNfzyezZs9G9e3dMnTrVuU9yFOUHqgQvHTp08NpFI68py9rLuejJJ59UrSuSUiDnMarDwYh8UHr27Illy5Y5m9YkuJDb999/f5nynTp1wrZt29z2PfXUUyoylijYX90vvpJmQGkdSS9ItwcjUU3cR9Rk7OeIGiKqtYxmvWqlCMbr+kpGXA4aNEgFIDLy8qqrrsLo0aN97jLZunUrVqxYobpoPEmQsWHDBtx9993OfQsXLlSjNm+66SbnPnntCy+8UP34ldYSqQ/V8W4a6T65/fbb0atXL/Tu3RuvvPKK6vuT0TXitttuQ7NmzVTeh8xDIk1urhzJSp77g8U1GHHDKeGJqJaTH1S+dpcEi7RCLF26FD///DOWLFmCWbNmYdKkSVi3bp1Pjz9z5oxqXZ8+fXqZ+5KTk9UPYpnvykHOP+WRkZzSur93714GI/UhGBk7dixOnTqFyZMnq6TVbt26YdGiRc6k1sOHD6sRNnV/sTxHMMKWESKicw2a+vXrpzY5d0h3zYIFC8ptfZdBEq569OiBL7/8Uk20Kd375YmJiam0DkePHsXp06dVAEP1JIFVumTK65YR0gRWEdcx5XVjsTzOwkpEVF3SAiJd+dI9I8mkclt+0J533nn47bff3MpKwCH3S66hdMskJCRgwoQJahCEJMH+/e9/V/ukdUNm9H7nnXfKzf+Q1hSZ12rUqFEq/0S6c+Sx7dq1U11FVPvUnSYMP+EsrERE/hMbG4vVq1dj6NChKtlU8gZnzpypkkk9ySzdElzIhJoykkZa2ps2bYo1a9aoFhMJaCT/QyZGky5/b63w8hwS6Fx//fXqNWWOK8l3lFE8gRgcQbVsaG9dwFlYiYj8R1pApCvfl5ZyCRzKm0Czffv2mD9/vs+vKfOZLF68uBq1pWAJ+ZYRx8q9mUVeZmFlAisREZFfhXwwUnk3DYMRIiIifwr5YMTZTVPoOSV8gv2SLSNERER+xWDE69De0paRkgLAkheEmhEREYUGBiPeghFzNGAozbrmiBoiIiK/YTBSGoycKT6DYlvx2Tt0OiaxEhERBUDIByMx5hjooPMyvJfBCBERkb+FfDBi0BvOjqgpMwurY64RBiNERET+EvLBSIV5I45uGuaMEBER+Q2DEZe5RjgLKxER1RcHDx5UixRu2bIFtR2DEZmFNYyzsBIR+Yus8P7Xv/4Vbdq0UWvDpKSkYNiwYWoBPSIR8mvTVDwLq6ObhsEIEVF1f53369dPLWz3wgsvqIXuiouL1doxsiLv77//HuwqUi3AlhEulkdEdVxeXp7XrbCw0OeyBQUFlZatqvvuu091Faxfvx6jRo1Si+Gdf/75mDhxIn755RdnOVmh94YbbkB0dLRa6XfMmDFITU113v/000+jW7du+PDDD9GqVSvExcXhpptuQm5urrPMF198oYIdWSivYcOGGDx4sFud33nnHbVwX3h4ODp16oTXXnutTJeGLMh3xRVXIDIyEl27dnVbuO/QoUOqRSc+Ph5RUVHqfXz//ffORf8k4HL11Vdfqed02Lp1q3rumJgY9R5lJeGNGzeWe9xuvvlmjB071m2fBHGJiYn4z3/+o27LAoSXXnqpel15v9dddx327dvn9W/hSx3F119/jR49eqjjJK1ZzzzzDEpKSuBPDEYkGAmvJIGV3TREVIvJCdzbJgGAq0aNGnkte80117iVlZO+Z5mqyMjIUCdMaQGRk7cnx4nRZrOpQETKr1q1CkuXLsX+/fvLnIzlRCsnz++++05tUvb5559X9504cQLjxo3Dn//8Z+zatQsrV67EyJEjoWmauv/jjz/G5MmT8a9//UvdP3XqVPzjH//ABx984PYakyZNwiOPPKLyLCRwkud0nIjlfRQVFWH16tXYtm0bpk+fXqVjcsstt6B58+bYsGEDNm3ahMcffxwmk8lr2W+//RZnzpxx7pPWpPz8fIwYMULdlkBLgjoJaKTLS6/Xq/vkeFbXjz/+iNtuuw0PPPAAdu7ciTfffFMFMXLc/EqrA7Kzs+XTpC794fPdn2td3u+i3f/D/e53pO7StCmxmjathV9el4jIVwUFBdrOnTvVpSf5fvS2DR061K1sZGSk17IDBgxwK5uYmFimTFWsW7dOPWb+/PkVlluyZIlmMBi0w4cPO/ft2LFDPXb9+vXq9pQpU1Tdc3JynGUeffRRrU+fPur6pk2bVPmDBw+W+xpt27bVPvnkE7d9zz33nNa3b191/cCBA+rx77zzTpk67Nq1S92+4IILtKeffrrc53/vvfe0uLg4t30LFixwO2YxMTHa+++/r/miuLhYHf///Oc/zn3jxo3Txo4d6/Uxp06dUq+3bds2t/f066+/+lzHQYMGaVOnTnUr8+GHH2rJycnV+mz6ev5mzogvQ3tl/hFrCWDg4SKi2sf117Mng8HgdjstLc1rWfll7Uq6Ls6Fo1WiMtJSIUmtsjl07txZtZzIfRdddJGzpUa6OBySk5Od70e6VAYNGqS6aYYMGYKrrroKo0ePVl0q0oIgrSp/+ctfcNdddzkfLy0e0t3j6sILL3R7fiGvId06f/vb33DvvfdiyZIlqgtIWp1cy1dGWjHGjx+vuprk8TfeeCPatm1bblmj0ai6qqRF59Zbb1XvQbpPPv30U2eZP/74Q7X2rFu3Dunp6c4WEeny6tKlC6pDupLWrFnj1hJitVpVd5+0ykj3lT+wm6bCBFZZube0L60gIwg1IyKqnHSBeNuk39/XspJrUVnZqmjfvr3KR6ipJFXPLg15bscJWIIu6d5ZuHChCmRmzZqFjh074sCBA85g7e2331bdL45t+/btbnkrnq/hyKVwvIYEEtJ9JMGBdNP06tVLvY4jkPMMviTHw5XkvezYsQPXXnstli9fruq5YMECr+9Xumqk+0WCIemekr/P1Vdf7bxf8leka0velwQksgmLxVLu8/lSRzlWkiPiepzkvUrg4/lZqkkMRlyG9pYJRvQGIMJ+Hyc+IyKqmoSEBNVKMWfOnHKTX7Oy7N+5klR65MgRtTlIvoLcLydsX0nwICN35GT666+/wmw2q5N948aN0bRpUxVItGvXzm1r3bp1ld6TtN7cc889KtH14YcfVoGASEpKUsm0ru+zvPk9JA/loYceUq0rktPy3nvveX2tSy65RL3evHnzVAuJtKQ4gqXTp09j9+7deOqpp1SLkBzDzEyP6Sk8+FJHSVyV5/U8TrJ5tpzVJPY7uCSw5lhyYNNs0Ov07l010irCJFYioiqTQEQChN69e+PZZ59V3RrSPSKtGK+//rrqhpEuC+lekZaAV155Rd0vo3AGDBigWh98Ia0C0oog3TOSpCu3T506pU7SQgIU6WaRbhlpXZBEVEn8lBO4dJ/44sEHH1RJvhJQyONWrFjhfP4+ffqoLownn3xSvY68viR+OshIpUcffVR1HUkAdPToUZXI6plgXN6omjfeeAN79uxRr+cg3U8yguatt95S3UnSNSMJsRWprI5Cun1kVE6LFi1UXSUAka4baUX65z//Cb/R6gB/J7BaSiwqgVW2rMIs9zvnDrUnsW7+yC+vTUTki4qSBGu748ePaxMmTNBatmypmc1mrVmzZtr111+vrVixwlnm0KFDal9UVJRK9Lzxxhu1kydPOu+XBNauXbu6Pe/LL7+snlPIsRkyZIiWlJSkhYWFaR06dNBmzZrlVv7jjz/WunXrpuoQHx+vXXbZZc7kWs9kT5GZman2Oep5//33q0RYeX55nVtvvVVLT093SwZt166dFhERoV133XXaW2+95UwOLSoq0m666SYtJSVFvX7Tpk3V81X299y5c6d6DnmfNpvN7b6lS5dq5513nqrPhRdeqK1cuVKVlXp4e08V1dFh0aJF2iWXXKLKxMbGar1791bl/JnAqpP/oZbLyclR0Wx2drYam+0PF39yMfKK8/Dt8G/RKq7V2TuWTgHWvAJ0uwUYfnZMOhFRIEkCoeQ/yK9qf/bdE9XkZ9PX8zdzRiobUdO6v/3ywI9BqBUREVH9x2CksllYUy4G9EYg+zCQeW7D3IiIiKgsBiOVtYyERQPNetqvs3WEiIioxjEYqWxKeNGqtKvmIIMRIiKimsZgpLKWEdH6srMtI7U/35eI6rFzWXeEqLZ+JjnPSGWzsIqU3oDBDOQeB07vAxLbBb6CRBTSZAIvmfPh+PHjavIque252ipRIMlgXJntVeZzkc+mfCari8GI5yyssg6NJ1ME0Lw3cOgn4OBqBiNEFHDyZS9DJ2V1WglIiGoLmUhNJkk7lxlaGYz40k3jGOIrwYh01fT6c2ArR0RU2joiX/oyQ6ksXkYUbLImkCzqd66tdAxGfOmmcSaxTgMO/mTPG2HzKBEFgXzpy/oknovGEdVlTGD1tWWkeS/AGA7kpQGndge2ckRERPUYg5FS8eFnV+4td4Z8YxiQ0sd+nUN8iYiIagyDEY9umhJbCfJL8ssv5JwafnUAa0ZERFS/MRgpFWGMQJghrJK8kcvOtoxwrD8REVGNYDBSXt5IecN7RbMegCkKKMgE0nYEtnJERET1FIORqiSxGkxAy77261ynhoiIqEYwGKlKMCK4Tg0REVGNYjBSlblGXJNYD64BbJx0iIiI6FwxGPEyvNerJl2BsFigKBs4+VvgKkdERFRPMRgpr2XEWwKrMBiBlpfYrzNvhIiI6JwxGKlqzohr3gjnGyEiIjpnDEaqE4w48kYOrwWsxQGoGRERUf3FYKScYCRb8kEq0vgCILwBYDkDHN8SmMoRERHVUwxGqtMyotcDrS61Xz/IrhoiIqJzwWCkOsGIaF06NTyTWImIiM4JgxEXDaTrBUBBSQGKrEW+JbEeWQeUWAJQOyIiovqJwYiLaFM0jDpj5cN7RaPzgMhEoDgfOLYpMBUkIiKqhxiMuNDpdIiVCc186arR6VzyRthVQ0REVF0MRjzEh/kwC6vnEF/ON0JERFRtDEaqsz6NQ6vSJNYj64HiQj/XjIiIqH5iMOJtRE1lOSMisT0Q3QSQZNej6/1fOSIionqIwUh1FstzzRtxdtUwb4SIiKg6GIycSzeN6xBfJrESERFVC4OR6k4J7+BoGTm6EbDk+7FmRERE9RODES/BSGZRpm8PiG8NxDYHbMXAvmX+rRwREVE9xGDkXFtGJG+ky0j79aWTOaqGiIioihiMeJkS3uecEXHZo0BMMpCxH/jpJf9VjoiIqB6qVjAyZ84ctGrVCuHh4ejTpw/Wr/c+rHX+/Pno1asXGjRogKioKHTr1g0ffvghan0Cqy9Dex3CY4Grn7df/+llIP0PP9WOiIio/qlyMDJv3jxMnDgRU6ZMwebNm9G1a1cMGTIEaWlp5ZZPSEjApEmTsHbtWvz222+488471bZ48WLU5hlYc4tzUWIrQbHFiqw0HxJTO98AtLsSsFqA/00ENM3/lSUiIqoHdJpWtbOmtIRcdNFFmD17trpts9mQkpKCv/71r3j88cd9eo4ePXrg2muvxXPPPedT+ZycHMTFxSE7Oxuxsfa1Y/zFarOi+4fdoUHDyjErsemTk/hjYyqGjO+Cdj0bVfzgjAPAaxcDJYXAyLeBC8f4ta5ERES1ma/n7yq1jFgsFmzatAmDBw8++wR6vbotLR+Vkbhn2bJl2L17Ny67rHQq9XIUFRWpN+C6BYpBb0CMOUZdzyzIxMFt6YAGrJ63B4V5xRU/OKG1PX9ELH4SKPBxRA4REVEIq1Iwkp6eDqvVisaNG7vtl9snT570+jiJiKKjo2E2m1WLyKxZs3DllVd6LT9t2jQVSTk2aXkJxoia44czUFxoVdcLcixYO39v5Q++5G9AYkcg7xSw7Fl/V5WIiKjOC8hompiYGGzZsgUbNmzAv/71L5VzsnLlSq/ln3jiCRXAOLYjR44gGCNq0vbnqsuYhuHqcueaEzi2p5LWDqMZuO5l+/WN7wFHNvi5tkRERCEUjCQmJsJgMCA1NdVtv9xu0qSJ9xfR69GuXTs1kubhhx/G6NGjVeuHN2FhYapvyXULylwjh+zdMp37JeP8/k3V9ZUf70ZJsb21xKtW/YBut0jHFPDdQ4C1xP+VJiIiCoVgRLpZevbsqfI+HCSBVW737dvX5+eRx0heSG2lghENKDxqPzzJbRug74i2iIw1Iys1H5sWHar8Sa58FoiIB1K3Aeve8H+liYiIQqWbRrpY3n77bXzwwQfYtWsX7r33XuTl5anhuuK2225T3SwO0gKydOlS7N+/X5WfOXOmmmfkT3/6E2pzMBJTlADkGaHX69CodSzCIk3oP7aDun/zokPIOJ5X8ZNEJdoDErFiKpB9NAA1JyIiqnuMVX3A2LFjcerUKUyePFklrUrXy6JFi5xJrYcPH1bdMg4SqNx33304evQoIiIi0KlTJ3z00UfqeWpzMNIkt426ntQyBiazQV1v2yMJrS5MxMHf0rHy498x4uEe0Ol13p+o25+AXz8GjvwCLHwMuOnjQL0FIiKi+jvPSDAEcp4R8dnuz7Dyk904P7Uful3ZAv1GtXPel5tRiP8+sw7FRVYMuLkjulzWrOInS90BvHkZYCsBxn0KdLzG7/UnIiKqt/OMhAppGUnOsbeMJLe1Tw/vEJMQjj432O+Tob55WZXkvjQ+H+g7wX79+0eBIvsIHSIiIrJjMFKOGFsDJBQklxuMiAsub45GrWJhKbTix8/2VP6EAx4D4lKA7CPAB9cDZ075o9pERER1EoORcthOhqnLnMhTiIgxl7lfklqv+FMndblv8ykc2FpJcGGOAm78AIhIAI5vBt4dDKT7MIEaERFRCGAwUo6CI/ak1GPRe2HTbOWWSWwerfJJxOpP98BSWMlcIs17An9ZCjRoCWQeBN69EjjifbVjIiKiUMFgpBxZh+x5ICdi9iHX4j3H46JrWyE2KQJnMouw7uv9lT9xYjtg/A9A0+5AQQbwwTBg17c1WXUiIqI6h8GIhxKLFacOnVHXT8buR3ZRtteyRrMBl9/cUV3/beVRHPk9o/IXiG4E3PE/oMPV9tV9590KrHur5t4AERFRHcNgxEPaoVzYrBoKzWeQE3YamUUVr0WTcl4CzrskWc3YuuiNbUg/ag9kKs0hGfsx0FMmitOAhY8CS/4hU9PW3BshIiKqIxiMeDixL0td5iakAjpU2DLicNm4DmjavoEaXfPdrC1qLpJKGYz2BfUGTbbf/vnfwPzxQEntnSafiIjIHxiMeDix1x58WBrZL7OK7MFJRYwmA6655wIkNI1CXrYF3/57Cwrz7IvsVUinA/o/DIx4C9CbgO1fAh+OADJ8yD8hIiKqJxiMuNBsGk7sswch+qb21o3Mwoq7aRzCo0y47v6uiGoQhsyT+fj+9d8qX93XoetY4E9fAGGxwKE1wKxewFf3Aaf3Vf/NEBER1REMRlxknMiDpaAExjADIhvbD40v3TSus7MO+2tXmCOMqoXlh7k7YbP5ONt+m8vtQ3/bXQloVmDLx8Dsi4AF9zAoISKieo3BiIsTe+1dMk1axyI+It7nbhpXDZtFY+g9F0Bv1GHfr6fw02d/wOflfxp1sreQjF8OtL/KHpRs/S8wuxcw//+A9D+q/qaIiIhqOQYjLo6X5oskt2uAuLC4agUjolnHeAy+o7O6vm3lUfy65HDVnkAmSLvlc+Cu5fYhwDLx2m/zgDm9gS/HA6d8mIKeiIiojmAwUs5ImuR2cWqxvOoGI6J9r8a49Mb26vraBfuwe93Jqj9Js57AzfOAu1YAHa6xByXbPgfmXGRfCXjFVODYZg4JJiKiOs0Y7ArUFjIc90xGEXR6HRq3isWRjHMLRkTXQSnIzSzE1h+OYPkHuxAZY0ZK54SqP1GzHsDNnwLHtwCrZgC7vwdObLVvq6YD0Y3t3TrSiiK5J2HR1a4zERFRoDEY8WgVSUqJhjnc6GwZyS70PYG1PP1GtkNeVhH2bkzDwje3YeBt56FtjyToZFhvVTXtBoz7BDiTBvyxFNizCNi3HDiTCvz6oX0zmIFW/e3BiQQxjTozOCEiolqNwYjH/CKSLyIcwYjMwCoJqNUKHmQqEb0Og2/vjIJcC47tzsLit7ejdddEDBjXUQ0DrhaZUr77LfZNJkk79DOwZzGwZ6F9Eb59y+ybvQZAQhugyQXuW0yyfZ4TIiKiIGMwUiYYiXMLRoptxSgoKUCkKbLaz20w6THs/m7YuPAgNi86hANb03FsTxb6jWqH8/olVzvQUYxhQNsr7NvV04D0PfYWkwOrgZPbgTMngYx99m3nV2cfF5EAND4fiG8FNGhh3+JSgAYpQExT+wyxREREAcAzDoCi/GKcPm5fUya5rT0IiTBGwKw3w2KzqLyRcwlGHAFJn+vboG2PRljx4S61Bs6Kj37Hng0ncfktndCg0bk9vyJBTVJH+9bvAfu+M6eA1G3ASdm22y8lYJFVgw/+aN/KPI8BiG1mD0zimttbYqKSXLZE+2VkImAKP/d6ExFRSGMwIqvz7s9R69XFNYpAZKxZ7ZPWCmkdSStIU101TaOb1shrJTaPxqjHeuG35Uew7uv9quvm0+fWo/ew1ug2KAV6Qw0PcIpOAqIHAm0Hnt1XXAic2gWk/Q5kHwGyDgFZR0qvHwFsxUD2YftWGXMMENUQCG8ARDQAwuM8Npd95mj7IoFhMfZLuS1Bnp6DuoiIQhmDEZfJzhz5Ig5x4XEqGDnXJFZPer0O3Qa3ULkjKz7ajWO7M7F2/j6V5Drwtk5IbB7j9bGSv5JnsaLAYkVhsRUFxe7X5bKw2IaiEitk8leZb02T/9SlegJ1qWkyqVtf6M19oW+igyFZp+plgIYoy2lEFR5HdMFxRBaeRHhRBsIsGQgrOg1zUQZMhadhLDwNvQQtllz7Vm26s4GJXEpwYo4ETBH26+rS9XqkvWvKGFF6GW5vnZFLt/1hgEEuzR6XYYDecA71JSKimsZgRI2kKc0XaWvPF3GID6veLKy+ikuKxA0PdsOun0/g5y/34tThXMz71wZEN4uCISUSZ5LMSLNakZpbiLScQqTmFCE1pxBFJYGaV0Rag7y1CGmIRT4a6nKQgBzE6vIRizx1GVd66bytk8sCRKMAUbpCRMK+6e1hEWA5Y98CxKYzwKY3Q9Ob1GYz2C812WcwAY5Ldd310qgudaX36WTkksEInd5Yus8MndxWmwl6VcZ+XcpAbQb3S53rbc99eo/75T7Zp7ffVve5Xjr2Ox7nuC4bk5WJqPaqU8FIXl4eDIayv2plX3h4uFs5b/R6PSIiIpy3c7JycfiPVFiLbYhLNrk9NgpR6lK6aUR+fr7Xqd2lWycy8mzeh69lj2YWYF1JLja2B0zb8tC6SI+iAwXAgdLnMdiQEWnGfpMVp/UabDJ6Ro3uAcKNBoSb9YgwGRBu1CPMZEBMdDQizAaYDXpYiy1y6pW2B1Vep67BedsUHqHmS7NqGixFhSixWtVaOjZNc+632jSU2DRoBjOsNqDEZoPFUoTi4hIUWyOQYQtHmjUJJVYpZ0OxVYPOFKZaWAw2KwyWQhitFhhsJdBrGgyaTV3qNSsitSLEGKyI0RUhUitEeEk+DFoRwjWL2sy6YoRrxQjTitX1aIMNESiGGSXQ2SzQWYthglXdZ9RsMKEERpSofVEGK8L1Nhhhha2kBCWOP4XzTyIBXZHawg2AQW8/NsU2DRbH+obl/PnCDICxvLKouKwcw6IKypoNgKkaZeXvU1hBWZNeyuvUp0Bi2AKrDhrsm3wSbCpIsd826vUwGeQvp0OJfPbkf6Vl7I+B87rBoINR/VvUwaoBBSUaNHWfHDaduq4OoU6vWtzMRr2zbGGJDVppKCrVcC2v1+mdzyv3y/M66ihzNNrDcCkPGHR6mIxG9RqyL790YUpHXc/O6ahXo9pMEkyWvk5hcYnzGKhLtVtfel1DmMnkrIO0NpYWUJ8J++Psu3TQw2ySr1H7c9if1/HRcQSAjn938m/UqO6Tf4tFxVZY3T5kZ8vr3OogZUtcvk9Kj63L9XCzlLWTsuo4yUHy/BjrdKVl7fdZSkrUZ8jJ4zPvLKuTsvbvB9XMWk5wK/V1JOPL81b0m0mOg0793YBiVQfvhc0mM/SlzyvfUbJ5chwLs8lUQVnH37n0eY1GdT5wL2v/65Spg1tZW7l1cJDPpHw25Rh5q69b2dLntcr3Z0mJx/s6+weRfxf2fxueZcvW17WsTb6zPZ7XW9keo4ah5fkXeq+vyQSz2ex83oKCAp/K1rtgpGnT8n+lDx06FP/73/+ctxs1aqSCgfIMGDAAK1eudN5u3bY1MjJOq+sPz/V4vU5NkfB4gnOxvM6dO+PQoUPlPq/ct2PHDuftiy66CDt37iy3bKOmzXHXnO/x4x/pOHQ6Hyc+eAiWk+WvOxMdHofnb5+PSwtNCI8Pw6sLH8K2XRvKLSsBjmswde211+L777+HN44vN7m8cdQofLlggdeyex97HOESrBTkY+Lixfhi716vZX9q3wEJpf/Anks9if9meW9ZWtqmDZqZ7B/YF9LS8F5mhteyX7dqjfZh9uHQs9NP4bXT9r9beea1aIkLIuzzq7ybcRozT53yWvb9lBT0jrQHnp9kZuKfaaley85OaY3+sQmw6Q34OjMdTx/d77Xsc+164orE5tB0Biw/fRxT9vzitewjnQbgyqbnqRPrutOHMWXLd17L/l/nIRjaurc6gW87fQiT137gtewtXa7DsI4D1cl/X+ZhPLX8Ja9lh3cZhhEXDFdfokezjmPS9095LTuk83W4sdetqr7pZ07hiS/v81r2svOuxbhL7lUtNDkF2Xjs43Fey/bpcBVuveIxdb2ouAAPz73Oa9nubS7DX658wnn7/jcHeS17fos+uPeaqc7bE9+9FpYS+8rcntold8WD1589To9/MBJnvHTVtkjqiL+PfM15e/LHNyND5v0pR5P4lnhqzNkvmX9+9meczCz/+yQhujGeveUT5+0Z8+/D4VO7K/yOcHjlm4nYKxMilsNsDMdLfzn7Xfn6wiex4/A6eDP7bscUAcC7S5/Br/tXey0788/fIUy6UgF8uGI61u1Z4rXstNu+RIzkmMm/1R9fxY87v/Fa9pmbP0bDmCbq+oK1b2LZb595LTvpxneRnNBKXf/fxg+wcNN/vJZ9dMQctJT1wAD8sGUevlr3lteyfxs2Ex1knicAq7Z/hc/XzPJa9p6r/4UuLS9W13/ZvQgfrZzpteyfB09Gj7YD1PXN+1Zh7g/TvZb90+WP4uKOV6rr2w/9gjcWTfJa9sZ+f8WALsPV9T3Ht+Df3z7stezwPv+Hwd3Gquvff/YZHp15ideyU6ZMwdNPP62u79q1C126dPFa9pFHHsELL7yAehmM+INNfqp5YSjNLajpbprTZyz46Bd7cqj8co4KM0DaMMpjCjeg5QUNcWRXBgozi1CQ5a2kzBavIe1QDuIaRSIsovI/7f6RI2HNzII1IwO5+72fVEXW/PmIdPwySEur+InlV46PSan5kSbkRJhg0+lRkGsCMiuob7M2sMQmQNMbkWrbAVQQjOzoMBj5DVOg6Yw4/Psa4NRCr2W3nn8rCpK7qK6NP3YtBdLe8Vp2+/njgdIvmr27FwFHvf9jO9DqWsSXftEc2rcKqCAYOdmkN/5of7W6fsIk5bwHI5nxHXC0+eXqerpuCyqSF9UU6Yld1fVs29lfz+UpCotHbqz9izy/uOLFHUuMkSiMSFTXLaoFzjsJxqySyyMfDenaCgaVNOXa5OX9/al2JJvrL74KjoVmhd6aa39+xfsvfJ1mhaHE8QHX1G3vbDAUp5/9za8VV1AHG4yWswGQTqvo76HBZDlxtg628gMyB1PRsbPPa/X+K1iYi47BbLP/WNBb8yopexRmXY6qg0GOX4V1OAqzyV5PgzW70rJhhfZjZnQeay9lLccQVmj/PBpLMiqp7wmEFcTaH1d8upLnlbL2ldaNllOVlD3pLGuyVLxkiNGShrAC+4/As39DL89bfMpZVv4uFT5vcTrCCkp/DDe1v8dg0Gk+LykbPDk5OYiLi8Px48cRGxtbo900X778Cw5tP42+w9viwoEpbmX/+/t/8er2V3FN62sw47IZ1eqm2XE8G//63y78evhsQNM6KQqXd26O/u2TcHHbhjDYilWTlzdRUVGwFJaoev6+/giO7clEfm75XziOXyZmQwk0Sy50ljzoC8/AYMmHwVoEQ4l0mxTCYLUgSmeTD4D6Qii2FqNEr8FqNsBi1sNi0qHIpKHQqKFIYjKzEcUGHUr0ehTBhmJpctdL7oVe5WBAJ8GPURrwYTZFQA+Tum6Vk5pND4NmgkEzwGAzwKAZYbCZoLcZEWaIhFkzqybsEmsxrDbvzYkmY5hqxheVljWYoS8NJqtS1mqV5mXvX/xGg1kFqdJ8atWKYUUxoNfsPQKl6RmqNVgvObMmGI0Gdd0Gq3peZwqHM5VDp7oQjNKkKc3cUlazqbLSvWEvZ08uluc1GPQwmcwwmaTZWE6bGqwlFuj09u4Q1R2nt3dLyG1pKg0zy/vTw6ZZUVwsZXWqGdlRRuog98tzmsPC1T4JbC3yvKX/ZqTpW5W39/chzByGsLAwe1lNQ5Gl0P5e5D/1vHr7bekONJoQFh5W2oSvIb8g33mfvUui9JjpdDAajQiPCFPPI8dY/h3J85UWVdTz6OzNy+ER4c7b+fLvXtXh7PPVxHdEVcr6oytXSHN4Zd8R1SlbWFgIawXdCFUpK/V1HPOioiKUVNA1UJWycnwdXSQWiwXFxcU1UlY+D45u/6qUlXJS3hv5dyGf46qWLSkpUcfCG+nykH/PVS0rfzP52/nSnVKVsr520zjO39nZ2eWev+tky4j8w3D9x1FROV/IF27mUYs6gbfp0rTM4xo1aKQuswrtgYTrl0NlJJvhxaW78fG6Q2pUS3R0FCZc0Q7DuzdDswZnv7x8/TPIFPWy+J5sQoKT7LQCZKXm4fSuI0jffQxZafkosBTDYo6FxWoEDPFAhGyoNvnn53zX8r3p+l3k7Xup4h9QZ8l3hst3sdFgUpvKwzTqoDfooDfpYDDq1WY0uV4aYDQZnPcZjGfL6T1vG3RnL+WxBvd9Ul7dLt0vr+287rx03ewnfqqeuLOfqEpFRPvekhIT630UWnW/I6patirfEVUp6xrw1GRZ1wCtJsvKSVa2mi4rJzdf8xD8VVZOso4TfU2WNRqNzsCkJstKEOXrZ7gqZSXoq8q/jcrUqWCkpmWezEdRXok6uSW2KLt+S3VW7pUEr882HsGMxbuRkWePiK+7MBmTrj0PyXHnEBWUzga7P2s/9u3dgMK16xG++Xc02nECcWdK4DoOKC0uHDtbJSA1PhxZ0eHIiY5AXngY9AiH2RqGSMQgRotDBKJg0ptg0plh0hthVNdNMOrs19WlbHLiNxhVspXZaLInXRn00MmJWX4Fl57Q1cnfoIehNIDQu1y373cEDo7rpfc59pde8kRPRBRaQjoYcSyO17hNrDpZenIulleawFqZ345m4R9f78DWI/bnbd8oGs9cfz4uaWfvW68Kq82KA9kHsOP0Dudm2b4Tw1cWocd+96bdQhOwo4UOv7c1I/2C5ghr0wZNY5ohJSIRXcMT0DCiIRLCE5xbuMzJQUREVEuEdjDiWI+mdAp4T66L5VUkO78Yzy/ahU83HFF5bNFhRjw4uD1uv6QVTD7OqCqtHutPrMdPx37CztM7sStjl1oTR7Q6qWHMjzb02ls6+kUHZLRKQF739jD16Ymkiy7FtQmtcEtYg3Nb54aIiCgIQjsYKW0ZcSyO5ykuzL5fggKL1QKzl5EAf/9yKxbvsGezD+/WFE8OPQ+NYsN9av3YlLoJCw8uxA+HfijTHdThtBm3rg1Dx22lwZBej9gbrkfSvffC3KJF1d4sERFRLRXSwcgND3ZXs682aVN+MBJjjlGjN2R0gwQKjSLtCa2usvItWLbLPtT1gz/3xoAOSRW+pjzX1lNbsfDAQiw9tBTpBTJ87+yMrwNbDMTFRc3Q+ov1sC37SXLu1QiB2OuuQ+J99yKsdetzft9ERES1SUgHI7GJEWrzRgIR6arJKMxAZmFmucHIkh2pasbMTk1ivAYiEoD8duo31fqx6OAipOanugU8V7a8EkNaDUF3WwoyXvk3cv73spoFVdVx6DVInDABYW3b1sh7JiIiqm1COhjxhXTVSDDyy4lfVCJoYulETw7fbbNPPnPtBclu+4usRVh3Yh2WH16OlUdW4nTh2YlyokxRGJgyEFe3vhp9k/vCZDChcNcuHL7rZljT7S0lMVdeicT770d4xw4BeZ9ERETBwmCkEtIaIqNaXtz4otqaRjVFl8QuamsV0wk/75NZ88wYemGyGnWz+uhqFYCsOb7GmYAqok3R6N+sv2oBubT5pQiTFWRL5a1fj6P3TYDtzBmEdeyIptOmIrxz5yC9YyIiosCqUzOwVjaDmz/sztiN/+z8D7anb1dBievCRULTdDDZknFhchOVC2J1meJZApkrUq5QrSAXNblItYB4yl22DMcemgjNYkFkr15o/tocGAL8HomIiIJ5/mYwUgVnLGfUsNtt6dtUcLLq0GYU69zXNGgf394ZgHRu2LnCobZZX36JE/+YrNZyiR40CM1mvgh9FWY5JCIiqs3q5XTwwRZtjkbv5N5qk1E0vZb+AKs+G1NvagCzOV/tT4lxX9+mPBL/nX7nHZyaaV8ZNG7USCQ/8wx0Pk7vS0REVJ/w7FdNzlE0jZphXJfLfH6cZrMhbcYLyHj/fXW74V13IWniQ5ysjIiIQhaDkWryNoqmIlpxMU489RSyv/5G3W702GNoeOcdfqsjERFRXcBgpBqki+bnvfYhuDKKxhe2ggIce/AhnFm1SpZGRNOp/0LcDTf4uaZERES1H4ORanCd6KxtUtnVfstzYtIkFYjowsPR7JWXEXP55X6vJxERUV3AYCQAXTT5v/6KnO8XqrVlWrzzthrCS0RERHa+LSlL1e6ikZEzaS+8qK7HjRzBQISIiMgDgxE/d9GcWb4cBZs3q+6ZpL/+NSB1JCIiqksYjPixi0YrKUFa6VwiCbffDlPjxn6vHxERUV3DYMSPXTRZX86HZf9+GBo0QMPxfwlADYmIiOoeBiN+6qKx5efj1OxZ6nrifffBEBMToFoSERHVLQxG/NRFk/HBB7CeSocpJQXxN40NQO2IiIjqJgYjfuiiKTl9GqfffkddT3rwAejM5oDUkYiIqC5iMOKHLpr0115X3TThXbog9pprAlZHIiKiuojBSA130VgOHULmvHnqeqNHHoFOz0NMRERUEZ4pa7iLJu3lV4CSEkRd1h9RF/cJUA2JiIjqLgYjNdhFU7B1K3IXLQJ0OjR6+JGA1pGIiKiuYjBSQ100btO+Dx+O8I4dAlY/IiKiuozBSA110ZxZuRL5GzdCFxaGpL9x2nciIiJfMRipgS4a+7TvM9X1hNtuhSnZt9V8iYiIiMFIpZbsTK20iyb7q69g2bsPhrg4NLzrrgDWjoiIqO5jMFKJo5n56vKC5nFec0VOz31PXW94zz0wxMYGtH5EREQhGYzMmTMHrVq1Qnh4OPr06YP169d7Lfv222+jf//+iI+PV9vgwYMrLF/bZOZb1GXDqLBy7y/cvl0thqcLD0eDG0cHuHZEREQhGIzMmzcPEydOxJQpU7B582Z07doVQ4YMQVpaWrnlV65ciXHjxmHFihVYu3YtUlJScNVVV+HYsWOo7aTVIzO/WF1vEGkqt0z2V1+ry5jBg2GIrnhmViIiIqqBYOSll17CXXfdhTvvvBOdO3fGG2+8gcjISMydO7fc8h9//DHuu+8+dOvWDZ06dcI777wDm82GZcuWobbLt1hhKbGp6wlRZdeX0SwW5Pzvf+p63A03BLx+REREIReMWCwWbNq0SXW1OJ9Ar1e3pdXDF/n5+SguLkZCQoLXMkVFRcjJyXHbgtlFYzbqEWk2lLn/zI8/wpqVBWNSEqIu6RuEGhIREYVYMJKeng6r1YrGjRu77ZfbJ0+e9Ok5HnvsMTRt2tQtoPE0bdo0xMXFOTfp2gmGzDx7F018pAk6nc5rF03s9cOgM5QNVoiIiKiWjaZ5/vnn8emnn2LBggUq+dWbJ554AtnZ2c7tyJEjCIaM0paR+MiyXTQlmZnIXblSXWcXDRERUfUZq1I4MTERBoMBqan2uTcc5HaTJk0qfOyLL76ogpEffvgBF154YYVlw8LC1FYbZl/1FozkLFwIFBcjrPN5CO/Aqd+JiIgC0jJiNpvRs2dPt+RTRzJq377ecyZmzJiB5557DosWLUKvXr1QV2TkWbwmr2Z/be+iacBWESIiosC1jAgZ1nv77beroKJ379545ZVXkJeXp0bXiNtuuw3NmjVTeR9i+vTpmDx5Mj755BM1N4kjtyQ6OlpttVlmaTASH+U+rLdo/wEUbv0NMBgQe+21QaodERFRiAYjY8eOxalTp1SAIYGFDNmVFg9HUuvhw4fVCBuH119/XY3CGT3afUIwmafk6aefRm3mmGPEs5sm+xt7q0j0pZfCmJgYlLoRERGFbDAi7r//frV5m+TM1cGDB1FXlZfAqtlsyP7mG3U9bsTwoNWNiIiovuDaND4ksLrmjOSv34CS4yegj4lB9BVXBLF2RERE9QODkQpk5JWdCt6RuBp7zTXQ14IRP0RERHUdgxEfElgdLSO2/HzkLl6srscN5ygaIiKimsBgpMJF8txzRnKXLVMBiSklBRHduwe5hkRERPUDgxEvCoqtKCpdJC++tGXEMf27zLha3vTwREREVHUMRiqZ8Mxs0CPKbEBxairyShcDjLvh+iDXjoiIqP5gMOJFVv7Z5FVpBcn59luZbhYRvXrCHKSF+4iIiOojBiM+TAUv+SNZX32lbnNRPCIioprFYMQLR/KqtIwU7twJy9590JnNiL366mBXjYiIqF5hMOLDsF7H3CIxgwfBEBMT5JoRERHVLwxGvMgozRlJCNMj57v/qevsoiEiIqp5DEYqmQq+w6HtsGZkwJCYiKh+/YJdLSIionqHwUglCaytt/ykLuOuuw46Y7XWFSQiIqIKMBipZGhvzHH7qsNR/S8Nco2IiIjqJwYjFbWMaBpMp0+p2+ZmzYJdJSIionqJwUgFQ3tjivOhLyxQt43JycGuEhERUb3EYKSCYKRRfqa6Lsmr+rCwYFeJiIioXmIwUo4CixWFxTY0ys9St01sFSEiIvIbBiPlyCgd1ptcaG8ZMTVtGuQaERER1V8MRiqYfbV5cY66ZDBCRETkPwxGKliXJrkwW12ym4aIiMh/GIyUI7N0jpGk/Ax1aWrGlhEiIiJ/YTBSQTdN/JnSYIQtI0RERH7DYMTLhGdmazEi85gzQkRE5G8MRrwskpdUYB/Wq4uMhD4uLthVIiIiqrcYjJQjI7/YOeGZqWkydDpdsKtERERUbzEY8dYy4ghGktlFQ0RE5E8MRrzkjDQu4IRnREREgcBgxMtomrPdNAxGiIiI/InBiJd5RhwJrJIzQkRERP7DYKScRfIKiq1sGSEiIgoQBiPlTAWv12xILOBU8ERERIHAYKScYCS+MBcmzQoYDDA2ahTsKhEREdVrDEY8ZOZJvoi9i8bYuBF0RmOwq0RERFSvMRgpp2WkMfNFiIiIAobBSDnBCJNXiYiIAofBSDkTnjmH9XL2VSIiIr9jMOIhy21dGgYjRERE/sZgpJyWkUac8IyIiChgGIx4YM4IERFRYDEY8VCQmYWokkJ1nROeERER+R+DEQ+GtDR1qcXGQR8ZGezqEBER1XsMRjyEZaSqSz1bRYiIiAKCwYiLwmIrYnMy1PUw5osQEREFBIMRL8mrEc2bBbs6REREIYHBiMe6NGeH9bJlhIiIKBAYjLjgsF4iIqLAYzDiEYw4VuzlhGdERESBwWDERVZ2HhIKc9V1towQEREFBoMRF/lHT0APDSVGEwwJCcGuDhERUUhgMOKi5MQJdVkYnwSdThfs6hAREYUEBiOuUu3BSHFi42DXhIiIKGQwGHFhSLdPBY/GDEaIiIgChcGIi/AMezBiSGbyKhERUaAwGHERnXVaXYY3YzBCREQUKAxGXDTItQcj0S2aB7sqREREIYPBSKmComIkls6+2qB1i2BXh4iIKGQwGCmVeSwNZlsJbNChQQq7aYiIiAKFwUip7IOH1WVWZCwM4WHBrg4REVHIqFYwMmfOHLRq1Qrh4eHo06cP1q9f77Xsjh07MGrUKFVeJhJ75ZVXUBudOXxUXWbFNAx2VYiIiEJKlYORefPmYeLEiZgyZQo2b96Mrl27YsiQIUhLK52jw0N+fj7atGmD559/Hk2aNEFtVXTsuLrMi0sMdlWIiIhCSpWDkZdeegl33XUX7rzzTnTu3BlvvPEGIiMjMXfu3HLLX3TRRXjhhRdw0003ISys9nZ/lJwsnQo+ISnYVSEiIgopVQpGLBYLNm3ahMGDB599Ar1e3V67dm2NVaqoqAg5OTlum7/pUk+qS2sSZ18lIiKqtcFIeno6rFYrGntMly63T560n8xrwrRp0xAXF+fcUlJS4G+m9FR1qavFXUlERET1Ua0cTfPEE08gOzvbuR05csTvrxmRma4ujZwKnoiIKKCMVSmcmJgIg8GA1FR7K4KD3K7J5FTJLQlkfoktLw/hBWfU9YjmzQL2ukRERFTFlhGz2YyePXti2bJlzn02m03d7tu3L+qq4hP25NUzxnA0SIoPdnWIiIhCSpVaRoQM67399tvRq1cv9O7dW80bkpeXp0bXiNtuuw3NmjVTeR+OpNedO3c6rx87dgxbtmxBdHQ02rVrh9qg+Lh9WG9aZDyaRJqDXR0iIqKQUuVgZOzYsTh16hQmT56skla7deuGRYsWOZNaDx8+rEbYOBw/fhzdu3d33n7xxRfVNmDAAKxcuRK1QfFxe8vIqYgG6BzFYISIiKhWByPi/vvvV1t5PAMMmXlV0zTUZoVH7bOvpkbGIz7SFOzqEBERhZRaOZom0PKPHFOX6ZHxiA1nMEJERBRIDEYkl6U0ZySvQRL0el2wq0NERBRSGIzIiKDSqeCLGnIqeCIiokAL+WBEKymBLt0+4ZnWiFPBExERBVrIByMlqanQaTYU6wwwJrFlhIiIKNBCPhhxTHiWHtEA8VHhwa4OERFRyGEwUpq8mhopwQjnGCEiIgo0BiOlwcipCM4xQkREFAwMRkpnX5Wp4NkyQkREFHgMRlzWpUngujREREQBx2CkNIE1TSWwspuGiIgo0EI6GJE1c1xbRuLZMkJERBRwIR2MWLOyoBUUOFfsZTBCREQUeCEdjDhaRTLCYmA1mhAbwW4aIiKiQGMwUtoq0iDSDAMXySMiIgq4kA5GShzJq5HxaMA5RoiIiIIipIOR4mMc1ktERBRsoR2MuA3rZTBCREQUDKEdjLgN62U3DRERUTCEdDCScPvt2DVoFA7EJrNlhIiIKEiMCGFxw67DT/nNkfrrMc4xQkREFCQh3TIiMvIt6pIJrERERMER8sFIZn6xumQ3DRERUXAwGMmzt4wwgZWIiCg4GIw4ghG2jBAREQVFSAcjxVYbcotK1HXmjBAREQVHSAcjmaXJqzoduEgeERFRkIR0MJJVmrzaIMLERfKIiIiCJKSDkQxn8iq7aIiIiIIlpIMRJq8SEREFX2gHI445RtgyQkREFDQhHoxwjhEiIqJgC+1gpLSbJoHdNEREREET0sGIY10a5owQEREFT0gHI5wKnoiIKPhCOxhhAisREVHQGRHCxl6Ugj6tE9CuUXSwq0JERBSyQjoYGde7RbCrQEREFPJCupuGiIiIgo/BCBEREQUVgxEiIiIKKgYjREREFFQMRoiIiCioGIwQERFRUDEYISIioqBiMEJERERBxWCEiIiIgorBCBEREQUVgxEiIiIKKgYjREREFFQMRoiIiCio6sSqvZqmqcucnJxgV4WIiIh85DhvO87jdToYyc3NVZcpKSnBrgoRERFV4zweFxfn9X6dVlm4UgvYbDYcP34cMTEx0Ol0NRqxSYBz5MgRxMbG1tjzkjse58DhsQ4MHufA4HGu+8dZQgwJRJo2bQq9Xl+3W0bkDTRv3txvzy8Hnx90/+NxDhwe68DgcQ4MHue6fZwrahFxYAIrERERBRWDESIiIgqqkA5GwsLCMGXKFHVJ/sPjHDg81oHB4xwYPM6hc5zrRAIrERER1V8h3TJCREREwcdghIiIiIKKwQgREREFFYMRIiIiCioGI0RERBRUIR2MzJkzB61atUJ4eDj69OmD9evXB7tKddrq1asxbNgwNe2vTNv/1Vdfud0vA7cmT56M5ORkREREYPDgwfjjjz+CVt+6atq0abjooovU8giNGjXC8OHDsXv3brcyhYWFmDBhAho2bIjo6GiMGjUKqampQatzXfT666/jwgsvdM5K2bdvXyxcuNB5P4+xfzz//PPq++PBBx907uOxPndPP/20Oq6uW6dOnWrNMQ7ZYGTevHmYOHGiGlu9efNmdO3aFUOGDEFaWlqwq1Zn5eXlqeMoQV55ZsyYgX//+9944403sG7dOkRFRaljLv8IyHerVq1SXxq//PILli5diuLiYlx11VXq+Ds89NBD+Pbbb/H555+r8rK208iRI4Na77pGlqCQE+OmTZuwceNGDBw4EDfccAN27Nih7ucxrnkbNmzAm2++qYJAVzzWNeP888/HiRMnnNtPP/1Ue46xFqJ69+6tTZgwwXnbarVqTZs21aZNmxbUetUX8tFasGCB87bNZtOaNGmivfDCC859WVlZWlhYmPbf//43SLWsH9LS0tTxXrVqlfO4mkwm7fPPP3eW2bVrlyqzdu3aINa07ouPj9feeecdHmM/yM3N1dq3b68tXbpUGzBggPbAAw+o/TzWNWPKlCla165dy72vNhzjkGwZsVgs6teOdBO4LsYnt9euXRvUutVXBw4cwMmTJ92OuSyeJN1jPObnJjs7W10mJCSoS/lsS2uJ67GW5tgWLVrwWFeT1WrFp59+qlqfpLuGx7jmSWvftdde63ZMBY91zZFucelGb9OmDW655RYcPny41hzjOrFqb01LT09XXy6NGzd22y+3f//996DVqz6TQESUd8wd91HV2Ww21bfer18/dOnSRe2T42k2m9GgQQO3sjzWVbdt2zYVfEhXovSjL1iwAJ07d8aWLVt4jGuQBHrSXS7dNJ74ea4Z8sPv/fffR8eOHVUXzTPPPIP+/ftj+/btteIYh2QwQlSffk3Kl4lr3y/VHPnilsBDWp+++OIL3H777ao/nWrOkSNH8MADD6j8JxlMQP5xzTXXOK9LTo4EJy1btsRnn32mBhQEW0h20yQmJsJgMJTJFJbbTZo0CVq96jPHceUxrzn3338/vvvuO6xYsUIlWzrI8ZSuyKysLLfyPNZVJ78W27Vrh549e6pRTJKg/eqrr/IY1yDpIpCBAz169IDRaFSbBHyS7C7X5dc5j3XNk1aQDh06YO/evbXi86wP1S8Y+XJZtmyZW3O33JYmWap5rVu3Vh9q12Oek5OjRtXwmFeN5AdLICJdBsuXL1fH1pV8tk0mk9uxlqG/0j/MY31u5HuiqKiIx7gGDRo0SHWHSQuUY+vVq5fKaXBc57GueWfOnMG+ffvUVAu14vOshahPP/1UjeR4//33tZ07d2r/93//pzVo0EA7efJksKtWp7Phf/31V7XJR+ull15S1w8dOqTuf/7559Ux/vrrr7XffvtNu+GGG7TWrVtrBQUFwa56nXLvvfdqcXFx2sqVK7UTJ044t/z8fGeZe+65R2vRooW2fPlybePGjVrfvn3VRr57/PHH1QilAwcOqM+r3NbpdNqSJUvU/TzG/uM6mkbwWJ+7hx9+WH1nyOd5zZo12uDBg7XExEQ1Gq82HOOQDUbErFmz1ME3m81qqO8vv/wS7CrVaStWrFBBiOd2++23O4f3/uMf/9AaN26sAsFBgwZpu3fvDna165zyjrFs7733nrOMBHj33XefGooaGRmpjRgxQgUs5Ls///nPWsuWLdX3Q1JSkvq8OgIRwWMcuGCEx/rcjR07VktOTlaf52bNmqnbe/furTXHWCf/C0wbDBEREVFZIZkzQkRERLUHgxEiIiIKKgYjREREFFQMRoiIiCioGIwQERFRUDEYISIioqBiMEJERERBxWCEiIiIgorBCBEREQUVgxEiIiIKKgYjREREhGD6fy+ksimvvcS/AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# plot results\n", + "plt.figure()\n", + "for i in range(num_clients):\n", + " plt.plot(history[f\"site-{i+1}\"], label=f\"site-{i+1}\")\n", + "plt.axhline(y=consensus_value, color='k', linestyle=\"--\", label=\"Consensus value\")\n", + "plt.legend()\n", + "plt.title(\"Evolution of local values\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/advanced/distributed_optimization/2-two_moons/README.md b/examples/advanced/distributed_optimization/2-two_moons/README.md new file mode 100644 index 0000000000..8d26022b39 --- /dev/null +++ b/examples/advanced/distributed_optimization/2-two_moons/README.md @@ -0,0 +1,28 @@ +# Distributed classification - two moons dataset + +In this example we consider the simple [two moons](https://scikit-learn.org/dev/modules/generated/sklearn.datasets.make_moons.html) classification problem and compare different distributed optimization algorithms: +- Distributed gradient descent +- Gradient tracking +- GTAdam + +We run all the algorithms with 6 clients, for 1000 iterations and with a stepsize of 0.01. These common parameters can be changed in the `config.py` file. + +The models and datasets are stored in `utils.py` and are the same for all algorithms. + +## Distributed gradient descent +``` +python launcher_dgd.py +``` +![dgd](dgd_results.png) + +## Gradient tracking +``` +python launcher_gt.py +``` +![gt](gt_results.png) + +## GTAdam +``` +python launcher_gtadam.py +``` +![gtadam](gtadam_results.png) \ No newline at end of file diff --git a/examples/advanced/distributed_optimization/2-two_moons/config.py b/examples/advanced/distributed_optimization/2-two_moons/config.py new file mode 100644 index 0000000000..fbd18354fb --- /dev/null +++ b/examples/advanced/distributed_optimization/2-two_moons/config.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +NUM_CLIENTS = 6 +ITERATIONS = 1000 +STEPSIZE = 0.01 \ No newline at end of file diff --git a/examples/advanced/distributed_optimization/2-two_moons/dgd_results.png b/examples/advanced/distributed_optimization/2-two_moons/dgd_results.png new file mode 100644 index 0000000000..420bf7c6ca Binary files /dev/null and b/examples/advanced/distributed_optimization/2-two_moons/dgd_results.png differ diff --git a/examples/advanced/distributed_optimization/2-two_moons/gt_results.png b/examples/advanced/distributed_optimization/2-two_moons/gt_results.png new file mode 100644 index 0000000000..86f0284f86 Binary files /dev/null and b/examples/advanced/distributed_optimization/2-two_moons/gt_results.png differ diff --git a/examples/advanced/distributed_optimization/2-two_moons/gtadam_results.png b/examples/advanced/distributed_optimization/2-two_moons/gtadam_results.png new file mode 100644 index 0000000000..869524de07 Binary files /dev/null and b/examples/advanced/distributed_optimization/2-two_moons/gtadam_results.png differ diff --git a/examples/advanced/distributed_optimization/2-two_moons/launcher_dgd.py b/examples/advanced/distributed_optimization/2-two_moons/launcher_dgd.py new file mode 100644 index 0000000000..49e1d99211 --- /dev/null +++ b/examples/advanced/distributed_optimization/2-two_moons/launcher_dgd.py @@ -0,0 +1,63 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from config import ITERATIONS, NUM_CLIENTS, STEPSIZE +from utils import NeuralNetwork, get_dataloaders, plot_results + +from nvflare.app_opt.p2p.controllers import DistOptController +from nvflare.app_opt.p2p.executors import DGDExecutor +from nvflare.app_opt.p2p.types import Config +from nvflare.app_opt.p2p.utils.config_generator import generate_random_network +from nvflare.job_config.api import FedJob + + +class CustomDGDExecutor(DGDExecutor): + def __init__(self, data_seed: int | None = None): + self._data_seed = data_seed + train_dataloader, test_dataloader = get_dataloaders(data_seed) + super().__init__( + model=NeuralNetwork(), + loss=torch.nn.CrossEntropyLoss(), + train_dataloader=train_dataloader, + test_dataloader=test_dataloader, + ) + + +if __name__ == "__main__": + # Create job + job_name = "dgd" + job = FedJob(name=job_name) + + # generate random config + network, _ = generate_random_network(num_clients=NUM_CLIENTS) + config = Config( + network=network, + extra={"iterations": ITERATIONS, "stepsize": STEPSIZE}, + ) + + # send controller to server + controller = DistOptController(config=config) + job.to_server(controller) + + # Add clients + for i in range(NUM_CLIENTS): + executor = CustomDGDExecutor(data_seed=i) + job.to(executor, f"site-{i + 1}") + + # run + job.export_job("./tmp/job_configs") + job.simulator_run(f"./tmp/runs/{job_name}") + + # plot and save results + plot_results(job_name, NUM_CLIENTS) diff --git a/examples/advanced/distributed_optimization/2-two_moons/launcher_gt.py b/examples/advanced/distributed_optimization/2-two_moons/launcher_gt.py new file mode 100644 index 0000000000..19b93d6e9e --- /dev/null +++ b/examples/advanced/distributed_optimization/2-two_moons/launcher_gt.py @@ -0,0 +1,61 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from config import ITERATIONS, NUM_CLIENTS, STEPSIZE +from utils import NeuralNetwork, get_dataloaders, plot_results + +from nvflare.app_opt.p2p.controllers import DistOptController +from nvflare.app_opt.p2p.executors import GTExecutor +from nvflare.app_opt.p2p.types import Config +from nvflare.app_opt.p2p.utils.config_generator import generate_random_network +from nvflare.job_config.api import FedJob + + +class CustomGTExecutor(GTExecutor): + def __init__(self, data_seed: int | None = None): + self._data_seed = data_seed + train_dataloader, test_dataloader = get_dataloaders(data_seed) + super().__init__( + model=NeuralNetwork(), + loss=torch.nn.CrossEntropyLoss(), + train_dataloader=train_dataloader, + test_dataloader=test_dataloader, + ) + + +if __name__ == "__main__": + # Create job + job_name = "gt" + job = FedJob(name=job_name) + + # generate random config + network, _ = generate_random_network(num_clients=NUM_CLIENTS) + config = Config( + network=network, extra={"iterations": ITERATIONS, "stepsize": STEPSIZE} + ) + + # send controller to server + controller = DistOptController(config=config) + job.to_server(controller) + + # Add clients + for i in range(NUM_CLIENTS): + executor = CustomGTExecutor(data_seed=i) + job.to(executor, f"site-{i + 1}") + + # run + job.export_job("./tmp/job_configs") + job.simulator_run(f"./tmp/runs/{job_name}") + + plot_results(job_name, NUM_CLIENTS) diff --git a/examples/advanced/distributed_optimization/2-two_moons/launcher_gtadam.py b/examples/advanced/distributed_optimization/2-two_moons/launcher_gtadam.py new file mode 100644 index 0000000000..bbd4821498 --- /dev/null +++ b/examples/advanced/distributed_optimization/2-two_moons/launcher_gtadam.py @@ -0,0 +1,67 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from config import ITERATIONS, NUM_CLIENTS, STEPSIZE +from utils import NeuralNetwork, get_dataloaders, plot_results + +from nvflare.app_opt.p2p.controllers import DistOptController +from nvflare.app_opt.p2p.executors import GTADAMExecutor +from nvflare.app_opt.p2p.types import Config +from nvflare.app_opt.p2p.utils.config_generator import generate_random_network +from nvflare.job_config.api import FedJob + + +class CustomGTADAMExecutor(GTADAMExecutor): + def __init__(self, data_seed: int | None = None): + self._data_seed = data_seed + train_dataloader, test_dataloader = get_dataloaders(data_seed) + super().__init__( + model=NeuralNetwork(), + loss=torch.nn.CrossEntropyLoss(), + train_dataloader=train_dataloader, + test_dataloader=test_dataloader, + ) + +if __name__ == "__main__": + # Create job + job_name = "gtadam" + job = FedJob(name=job_name) + + # generate random config + network, _ = generate_random_network(num_clients=NUM_CLIENTS) + config = Config( + network=network, + extra={ + "iterations": ITERATIONS, + "stepsize": STEPSIZE, + "beta1": 0.9, + "beta2": 0.999, + "epsilon": 1e-8, + }, + ) + + # send controller to server + controller = DistOptController(config=config) + job.to_server(controller) + + # Add clients + for i in range(NUM_CLIENTS): + executor = CustomGTADAMExecutor(data_seed=i) + job.to(executor, f"site-{i + 1}") + + # run + job.export_job("./tmp/job_configs") + job.simulator_run(f"./tmp/runs/{job_name}") + + plot_results(job_name, NUM_CLIENTS) diff --git a/examples/advanced/distributed_optimization/2-two_moons/utils.py b/examples/advanced/distributed_optimization/2-two_moons/utils.py new file mode 100644 index 0000000000..4c56b57b4f --- /dev/null +++ b/examples/advanced/distributed_optimization/2-two_moons/utils.py @@ -0,0 +1,104 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from torch import nn +from sklearn.datasets import make_moons +import torch +from torch.utils.data import Dataset, DataLoader +import matplotlib.pyplot as plt + + +class TwoMoonsDataset(Dataset): + def __init__(self, X, y): + self.X = X + self.y = y + + def __len__(self): + return len(self.X) + + def __getitem__(self, idx): + return self.X[idx], self.y[idx] + + +def get_dataloaders(data_seed:int): + X, y = make_moons(n_samples=128, noise=0.1, random_state=data_seed) + + X_train = torch.from_numpy(X).float() + y_train = torch.from_numpy(y).long() + + X, y = make_moons(n_samples=20, noise=0.1, random_state=42) + + X_test = torch.from_numpy(X).float() + y_test = torch.from_numpy(y).long() + + train_dataset = TwoMoonsDataset(X_train, y_train) + test_dataset = TwoMoonsDataset(X_test, y_test) + + train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True) + test_dataloader = DataLoader(test_dataset, batch_size=20) + return train_dataloader, test_dataloader + + +class NeuralNetwork(nn.Module): + def __init__(self): + super(NeuralNetwork, self).__init__() + self.fc1 = nn.Linear(2, 256) + self.fc2 = nn.Linear(256, 256) + self.fc3 = nn.Linear(256, 2) + + def forward(self, x): + x = torch.relu(self.fc1(x)) + x = torch.relu(self.fc2(x)) + x = self.fc3(x) + return x + + +def plot_results(job, num_clients): + plt.style.use('ggplot') + train_loss = {} + test_loss = {} + for i in range(num_clients): + train_loss[f"site-{i + 1}"] = torch.load( + f"./tmp/runs/{job}/site-{i + 1}/train_loss_sequence.pt" + ) + test_loss[f"site-{i + 1}"] = torch.load( + f"./tmp/runs/{job}/site-{i + 1}/test_loss_sequence.pt" + ) + + fig, axs = plt.subplots(1, 2, figsize=(12, 6)) + + # First subplot: Evolution of training loss + for i in range(num_clients): + time = train_loss[f"site-{i + 1}"][:, 0] + loss = train_loss[f"site-{i + 1}"][:, 1] + axs[0].plot(range(len(loss)), loss, label=f"site-{i + 1}") + axs[0].legend() + axs[0].set_ylim(-0.1, 1) + axs[0].set_ylabel("Loss") + axs[0].set_xlabel("Iteration") + axs[0].set_title("Evolution of Training Loss") + + # Second subplot: Evolution of test loss + for i in range(num_clients): + time = test_loss[f"site-{i + 1}"][:, 0] + loss = test_loss[f"site-{i + 1}"][:, 1] + axs[1].plot(range(len(loss)), loss, label=f"site-{i + 1}") + axs[1].legend() + axs[1].set_ylim(-0.1, 1) + axs[1].set_ylabel("Loss") + axs[1].set_xlabel("Iteration") + axs[1].set_title("Evolution of Test Loss") + + plt.tight_layout() + plt.savefig(f"{job}_results.png") + # plt.show() \ No newline at end of file diff --git a/examples/advanced/distributed_optimization/3-mnist/README.md b/examples/advanced/distributed_optimization/3-mnist/README.md new file mode 100644 index 0000000000..97f24e8a8c --- /dev/null +++ b/examples/advanced/distributed_optimization/3-mnist/README.md @@ -0,0 +1,32 @@ +# Distributed classification - MNIST dataset + +In this example we consider a distributed classification problem over the MNIST dataset and compare different distributed optimization algorithms: +- Distributed gradient descent +- Gradient tracking +- GTAdam + +The learning scenario we consider here is a more challenging one than in the previous `two_moons` example. +Each client has training samples from a (non-overlapping) subset of the labels from the dataset. +This means that if we have 10 clients, each of them will have in their own training dataset only images of a single digit. This means that none of the clients would be able to learn anything meaningful on its own, but by using a distributed algorithm, all of them will learn to correctly classify all the digits. + +We run all the algorithms with 4 clients, for 10,000 iterations and with a stepsize of 0.001. These common parameters can be changed in the `config.py` file. + +The models and datasets are stored in `utils.py` and are the same for all algorithms. + +## Distributed gradient descent +``` +python launcher_dgd.py +``` +![dgd](dgd_results.png) + +## Gradient tracking +``` +python launcher_gt.py +``` +![gt](gt_results.png) + +## GTAdam +``` +python launcher_gtadam.py +``` +![gtadam](gtadam_results.png) \ No newline at end of file diff --git a/examples/advanced/distributed_optimization/3-mnist/config.py b/examples/advanced/distributed_optimization/3-mnist/config.py new file mode 100644 index 0000000000..8ce3abb37d --- /dev/null +++ b/examples/advanced/distributed_optimization/3-mnist/config.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +NUM_CLIENTS = 4 +ITERATIONS = 20000 +STEPSIZE = 0.001 \ No newline at end of file diff --git a/examples/advanced/distributed_optimization/3-mnist/dgd_results.png b/examples/advanced/distributed_optimization/3-mnist/dgd_results.png new file mode 100644 index 0000000000..cd80de2526 Binary files /dev/null and b/examples/advanced/distributed_optimization/3-mnist/dgd_results.png differ diff --git a/examples/advanced/distributed_optimization/3-mnist/gt_results.png b/examples/advanced/distributed_optimization/3-mnist/gt_results.png new file mode 100644 index 0000000000..4d5c6d84bf Binary files /dev/null and b/examples/advanced/distributed_optimization/3-mnist/gt_results.png differ diff --git a/examples/advanced/distributed_optimization/3-mnist/gtadam_results.png b/examples/advanced/distributed_optimization/3-mnist/gtadam_results.png new file mode 100644 index 0000000000..577993f3d7 Binary files /dev/null and b/examples/advanced/distributed_optimization/3-mnist/gtadam_results.png differ diff --git a/examples/advanced/distributed_optimization/3-mnist/launcher_dgd.py b/examples/advanced/distributed_optimization/3-mnist/launcher_dgd.py new file mode 100644 index 0000000000..59fdfd1c9c --- /dev/null +++ b/examples/advanced/distributed_optimization/3-mnist/launcher_dgd.py @@ -0,0 +1,62 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from config import ITERATIONS, NUM_CLIENTS, STEPSIZE +from utils import NeuralNetwork, get_dataloaders, plot_results + +from nvflare.app_opt.p2p.controllers import DistOptController +from nvflare.app_opt.p2p.executors import DGDExecutor +from nvflare.app_opt.p2p.types import Config +from nvflare.app_opt.p2p.utils.config_generator import generate_random_network +from nvflare.job_config.api import FedJob + + +class CustomDGDExecutor(DGDExecutor): + def __init__(self, data_chunk: int | None = None): + self._data_chunk = data_chunk + train_dataloader, test_dataloader = get_dataloaders(data_chunk) + super().__init__( + model=NeuralNetwork(), + loss=torch.nn.CrossEntropyLoss(), + train_dataloader=train_dataloader, + test_dataloader=test_dataloader, + ) + + +if __name__ == "__main__": + # Create job + job_name = "dgd" + job = FedJob(name=job_name) + + # generate random config + network, _ = generate_random_network(num_clients=NUM_CLIENTS) + config = Config( + network=network, extra={"iterations": ITERATIONS, "stepsize": STEPSIZE} + ) + + # send controller to server + controller = DistOptController(config=config) + job.to_server(controller) + + # Add clients + for i in range(NUM_CLIENTS): + executor = CustomDGDExecutor(data_chunk=i) + job.to(executor, f"site-{i + 1}") + + # run + job.export_job("./tmp/job_configs") + job.simulator_run(f"./tmp/runs/{job_name}") + + # plot and save results + plot_results(job_name, NUM_CLIENTS) diff --git a/examples/advanced/distributed_optimization/3-mnist/launcher_gt.py b/examples/advanced/distributed_optimization/3-mnist/launcher_gt.py new file mode 100644 index 0000000000..593f617578 --- /dev/null +++ b/examples/advanced/distributed_optimization/3-mnist/launcher_gt.py @@ -0,0 +1,58 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from config import ITERATIONS, NUM_CLIENTS, STEPSIZE +from utils import NeuralNetwork, get_dataloaders, plot_results + +from nvflare.app_opt.p2p.controllers import DistOptController +from nvflare.app_opt.p2p.executors import GTExecutor +from nvflare.app_opt.p2p.types import Config +from nvflare.app_opt.p2p.utils.config_generator import generate_random_network +from nvflare.job_config.api import FedJob + + +class CustomGTExecutor(GTExecutor): + def __init__(self, data_chunk: int | None = None): + self._data_chunk = data_chunk + train_dataloader, test_dataloader = get_dataloaders(data_chunk) + super().__init__( + model=NeuralNetwork(), + loss=torch.nn.CrossEntropyLoss(), + train_dataloader=train_dataloader, + test_dataloader=test_dataloader, + ) + +if __name__ == "__main__": + # Create job + job_name = "gt" + job = FedJob(name=job_name) + + # generate random config + network, _ = generate_random_network(num_clients=NUM_CLIENTS) + config = Config(network=network, extra={"iterations": ITERATIONS, "stepsize": STEPSIZE}) + + # send controller to server + controller = DistOptController(config=config) + job.to_server(controller) + + # Add clients + for i in range(NUM_CLIENTS): + executor = CustomGTExecutor(data_chunk=i) + job.to(executor, f"site-{i + 1}") + + # run + job.export_job("./tmp/job_configs") + job.simulator_run(f"./tmp/runs/{job_name}") + + plot_results(job_name, NUM_CLIENTS) diff --git a/examples/advanced/distributed_optimization/3-mnist/launcher_gtadam.py b/examples/advanced/distributed_optimization/3-mnist/launcher_gtadam.py new file mode 100644 index 0000000000..5824d80af2 --- /dev/null +++ b/examples/advanced/distributed_optimization/3-mnist/launcher_gtadam.py @@ -0,0 +1,67 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from config import ITERATIONS, NUM_CLIENTS, STEPSIZE +from utils import NeuralNetwork, get_dataloaders, plot_results + +from nvflare.app_opt.p2p.controllers import DistOptController +from nvflare.app_opt.p2p.executors import GTADAMExecutor +from nvflare.app_opt.p2p.types import Config +from nvflare.app_opt.p2p.utils.config_generator import generate_random_network +from nvflare.job_config.api import FedJob + + +class CustomGTADAMExecutor(GTADAMExecutor): + def __init__(self, data_chunk: int | None = None): + self._data_chunk = data_chunk + train_dataloader, test_dataloader = get_dataloaders(data_chunk) + super().__init__( + model=NeuralNetwork(), + loss=torch.nn.CrossEntropyLoss(), + train_dataloader=train_dataloader, + test_dataloader=test_dataloader, + ) + +if __name__ == "__main__": + # Create job + job_name = "gtadam" + job = FedJob(name=job_name) + + # generate random config + network, _ = generate_random_network(num_clients=NUM_CLIENTS) + config = Config( + network=network, + extra={ + "iterations": ITERATIONS, + "stepsize": STEPSIZE, + "beta1": 0.9, + "beta2": 0.999, + "epsilon": 1e-8, + }, + ) + + # send controller to server + controller = DistOptController(config=config) + job.to_server(controller) + + # Add clients + for i in range(NUM_CLIENTS): + executor = CustomGTADAMExecutor(data_chunk=i) + job.to(executor, f"site-{i + 1}") + + # run + job.export_job("./tmp/job_configs") + job.simulator_run(f"./tmp/runs/{job_name}") + + plot_results(job_name, NUM_CLIENTS) diff --git a/examples/advanced/distributed_optimization/3-mnist/utils.py b/examples/advanced/distributed_optimization/3-mnist/utils.py new file mode 100644 index 0000000000..15716d41d2 --- /dev/null +++ b/examples/advanced/distributed_optimization/3-mnist/utils.py @@ -0,0 +1,105 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import matplotlib.pyplot as plt +import torch +from torch import nn +from torchvision import datasets +from torchvision.transforms import ToTensor +from config import NUM_CLIENTS + + +def get_dataloaders(data_chunk): + training_data = datasets.MNIST( + root="data", train=True, download=True, transform=ToTensor() + ) + test_data = datasets.MNIST( + root="data", train=False, download=True, transform=ToTensor() + ) + + # split dataset so that each agent has a subset with (distinct) labels + labels = [ + (training_data.targets == i).nonzero(as_tuple=True)[0].tolist() + for i in range(10) + ] + indices = torch.tensor_split(torch.arange(10), NUM_CLIENTS)[data_chunk] + local_labels = [] + for i in indices: + local_labels += labels[i] + + train_dataloader = torch.utils.data.DataLoader( + torch.utils.data.Subset(training_data, local_labels), + batch_size=128, + ) + + test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=128) + return train_dataloader, test_dataloader + + +class NeuralNetwork(nn.Module): + def __init__(self): + super().__init__() + self.flatten = nn.Flatten() + self.linear_relu_stack = nn.Sequential( + nn.Linear(28 * 28, 256), + nn.ReLU(), + nn.Linear(256, 256), + nn.ReLU(), + nn.Linear(256, 10), + ) + + def forward(self, x): + x = self.flatten(x) + logits = self.linear_relu_stack(x) + return logits + + +def plot_results(job, num_clients): + plt.style.use('ggplot') + train_loss = {} + test_loss = {} + for i in range(num_clients): + train_loss[f"site-{i + 1}"] = torch.load( + f"./tmp/runs/{job}/site-{i + 1}/train_loss_sequence.pt" + ) + test_loss[f"site-{i + 1}"] = torch.load( + f"./tmp/runs/{job}/site-{i + 1}/test_loss_sequence.pt" + ) + + fig, axs = plt.subplots(1, 2, figsize=(12, 6)) + + # First subplot: Evolution of training loss + for i in range(num_clients): + time = train_loss[f"site-{i + 1}"][:, 0] + loss = train_loss[f"site-{i + 1}"][:, 1] + axs[0].plot(time, loss, label=f"site-{i + 1}") + axs[0].legend() + axs[0].set_ylim(-0.1, 3) + axs[0].set_ylabel("Loss") + axs[0].set_xlabel("Time (s)") + axs[0].set_title("Evolution of Training Loss") + + # Second subplot: Evolution of test loss + for i in range(num_clients): + time = test_loss[f"site-{i + 1}"][:, 0] + loss = test_loss[f"site-{i + 1}"][:, 1] + axs[1].plot(time, loss, label=f"site-{i + 1}") + axs[1].legend() + axs[1].set_ylim(-0.1, 3) + axs[1].set_ylabel("Loss") + axs[1].set_xlabel("Time (s)") + axs[1].set_title("Evolution of Test Loss") + + plt.tight_layout() + # plt.savefig(f"{job}_results.png") + plt.show() \ No newline at end of file diff --git a/examples/advanced/distributed_optimization/README.md b/examples/advanced/distributed_optimization/README.md new file mode 100644 index 0000000000..6b6aad26f7 --- /dev/null +++ b/examples/advanced/distributed_optimization/README.md @@ -0,0 +1,457 @@ +# P2P Distributed Optimization algorithms with NVFlare + +In this example we show how to exploit the lower-level NVFlare APIs to implement and run P2P distributed optimization algorithms. The aim here is twofold: on one hand we provide a few [examples](#examples) showing how to directly use the `nvflare.app_opt.p2p` API to run distributed optimization algorithms, on the other hand we provide a [walkthrough](#implementation-walkthrough) of the actual implementation of the APIs in `nvflare.app_opt.p2p` to show how to exploit lower-level NVFlare APIs for advanced use-cases. + + +## Examples +The following algorithms are currently implemented in `nvflare.app_opt.p2p`: +- Consensus algorithm - initially published in [DeGroot, M. H. (1974). Reaching a Consensus. Journal of the American Statistical Association, 69(345), 118–121.](https://doi.org/10.2307/2285509) +- Distributed (stochastic) gradient descent [Tsitsiklis, J., Bertsekas, D., & Athans, M. (1986). Distributed asynchronous deterministic and stochastic gradient optimization algorithms. IEEE transactions on automatic control, 31(9), 803-812.](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=1104412) and [Sundhar Ram, S., Nedić, A., & Veeravalli, V. V. (2010). Distributed stochastic subgradient projection algorithms for convex optimization. Journal of optimization theory and applications, 147, 516-545.](https://arxiv.org/pdf/0811.2595) +- (Stochastic) gradient tracking [Pu, S., & Nedić, A. (2021). Distributed stochastic gradient tracking methods. Mathematical Programming, 187(1), 409-457.](https://arxiv.org/pdf/1805.11454) +- GTAdam [Carnevale, G., Farina, F., Notarnicola, I., & Notarstefano, G. (2022). GTAdam: Gradient tracking with adaptive momentum for distributed online optimization. IEEE Transactions on Control of Network Systems, 10(3), 1436-1448.](https://ieeexplore.ieee.org/abstract/document/9999485) + + +In this repo we provide the following examples: +- [1-consensus](./1-consensus/) - a simple consensus algorithm to compute the average of a set of numbers +- [2-two_moons](./2-two_moons/) - different distributed optimization algorithms solving the two moons classification problem +- [3-mnist](./3-mnist/) - different distributed optimization algorithms training local models to classify MNIST images in a heavily unbalanced setting + + +## Implementation walkthrough +Let's now walk through how to use NVFlare to implement custom peer-to-peer (P2P) algorithms, opening the road to easily implement custom distributed optimization and swarm learning workflows. +Specifically, we'll delve into using some lower-level NVFlare APIs to create a controllers and executors, which serve as the backbone for orchestrating communication and computation across different nodes (clients) in a distributed setup. +As an example, we'll demonstrate how to implement a consensus algorithm using these components and we'll show it in action in the next notebook. + +As said, the final implementation is in the `nvflare.app_opt.p2p` module - we'll refer to the specific files along the notebook. + +### Introduction + +In peer-to-peer (P2P) algorithms, being distributed optimization or decentralized federated learning algorithms, clients communicate directly with each other without relying on a central server to aggregate updates. Implementing P2P algorithms usually requires careful orchestration to handle communication, synchronization, and data exchange among clients. + +Thankfully, NVFlare natively provides the primitives to easily build such a system. + +#### Exploiting NVFlare low-level APIs +NVFlare natively supports various communication and orchestration patterns, including peer-to-peer interactions, which are crucial for decentralized algorithms. + +To implement custom P2P/distributed optimization algorithms, we'll delve into its lower level APIs to build a framework facilitate building P2P algorithms. In particular, we'll use + +- [Controllers](https://nvflare.readthedocs.io/en/2.5/apidocs/nvflare.apis.impl.controller.html#module-nvflare.apis.impl.controller): Server-side components that manage job execution and orchestrate tasks. +- [Executors](https://nvflare.readthedocs.io/en/2.5/apidocs/nvflare.apis.executor.html#module-nvflare.apis.executor): Client-side components that perform computations and handle tasks received from the controller. +- [Messages via aux channes](https://nvflare.readthedocs.io/en/2.5/apidocs/nvflare.private.aux_runner.html#nvflare.private.aux_runner.AuxRunner.send_aux_request): Custom messages that enable direct communication between clients. + +#### What to expect +We'll start by defining a way to easily define and share configurations across the network. Then we'll implement a base controller and executor, serving as the backbone to implement arbitrary p2p algorithms. Finally we'll build upon the base executor to implement a specific algorithm, the Consensus algorithm. + +### Pythonic configs + +Before we dive into the controller and executor, let's start by creating a Pythonic way to define the configuration of our network. We'll start by simply defining a `Node` (i.e. a client in the network), its `Neighbors` (i.e. other clients with which a client communicates, each with a weight) and combine them to define a `Network` (i.e. a network of clients with neighbors). + +```python +from dataclasses import dataclass, field + +@dataclass +class Neighbor: + id: int | str + weight: float | None = None + +@dataclass +class Node: + id: int | str + neighbors: list[Neighbor] = field(default_factory=list) + +@dataclass +class Network: + nodes: list[Node] = field(default_factory=list) +``` + +Then we'll define a global and a local config objects to be passed to the controller and executors respectively. + +```python +@dataclass +class Config: + network: Network + extra: dict = field(default_factory=dict) + +@dataclass +class LocalConfig: + neighbors: list[Neighbor] + extra: dict = field(default_factory=dict) +``` + +The `extra` parameter can be used to pass additional parameters, usually specific for the various algorithms. + +To actual implementation of the objects above can be found in `nvflare/app_opt/p2p/types/__init__.py` (you'll see they'll have the `__dict__` and `__post_init__` methods defined facilitate serializing and deserializing them, which is needed for NVFlare). + +Here's an example of a ring network with 3 clients, running an algorithm for 100 iterations: +```shell +Config( + extra={"iterations":100}, + network=Network( + nodes=[ + Node( + id='site-1', + neighbors=[ + Neighbor(id='site-2', weight=0.1), + ] + ), + Node( + id='site-2', + neighbors=[ + Neighbor(id='site-3', weight=0.1), + ] + ), + Node( + id='site-3', + neighbors=[ + Neighbor(id='site-1', weight=0.1), + ] + ), + ] + ) +) +``` + +### The controller + +In NVFlare, a `Controller` is a server-side component that manages the job execution and orchestration of tasks. Here, since we're running a P2P algorithm, we'll implement a custom controller whose main job is to load and broadcast the network configuration, and initiate/terminate the execution of a P2P distributed optimization algorithm. Let's call it `DistOptController`. As a subclass of `Controller`, it must implement 3 methods: + +- `start_controller` which is called at the beginning of the run +- `control_flow` defining the main control flow of the controller (in this case, broadcasting the configuration and asking clients to run the algorithm) +- `stop_controller`, called at the end of the run + +```python +from nvflare.apis.impl.controller import Controller + +class DistOptController(Controller): + + def control_flow(self, abort_signal: Signal, fl_ctx: FLContext): + # Broadcast configuration to clients + ... + + # Run the algorithm + ... + + def start_controller(self, fl_ctx: FLContext): + pass + + def stop_controller(self, fl_ctx: FLContext): + pass +``` + +We won't do anything fancy during the start and stop phase, so let's focus on the `control_flow` and implement the two steps. To do so, we first need to override the `__init__` method to take a `Config` object as an argument. + +```python +from nvflare.app_opt.p2p.types import Config + +class DistOptController(Controller): + def __init__( + self, + config: Config, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.config = config + + ... +``` + +Now, in the `control_flow` method we can send the local configurations to each client and, once they receive them, ask them to run the algorithm. We'll do so by sending `Task`s to each client. In NVFlare, a `Task` is a piece of work that is assigned by the Controller to client workers. Depending on how the task is assigned (broadcast, send, or relay), the task will be performed by one or more clients. + +In fact, on one hand, we'll use the `send_and_wait` method to send the `"config"` task to each client, since each client will potentially have a different config (because of different neighbors); on the other hand, to run the algorith, we'll use the `broadcast_and_wait`, which broadcasts the same `"run_algorithm"` task to all clients and waits for all clients to respond/complete the task. As you see, each task specifies a `name` - in this case, `"config"` and `"run_algorithm"` - let's remember those as they'll need to be the same in the control flow of each client. + + +```python +from nvflare.apis.controller_spec import Task +from nvflare.apis.dxo import DXO, DataKind +from nvflare.apis.fl_context import FLContext +from nvflare.apis.signal import Signal + +class DistOptController(Controller): + + ... + + def control_flow(self, abort_signal: Signal, fl_ctx: FLContext): + # Send network config (aka neighors info) to each client + for node in self.config.network.nodes: + task = Task( + name="config", + data=DXO( + data_kind=DataKind.APP_DEFINED, + data={"neighbors": [n.__dict__ for n in node.neighbors]}, + ).to_shareable(), + ) + self.send_and_wait(task=task, targets=[node.id], fl_ctx=fl_ctx) + + # Run algorithm (with extra params if any passed as data) + targets = [node.id for node in self.config.network.nodes] + self.broadcast_and_wait( + task=Task( + name="run_algorithm", + data=DXO( + data_kind=DataKind.APP_DEFINED, + data={key: value for key, value in self.config.extra.items()}, + ).to_shareable(), + ), + targets=targets, + min_responses=0, + fl_ctx=fl_ctx, + ) + + ... +``` + +And that's it, our `DistOptController` is ready. The complete implementation of the `DistOptController` can be found in `nvflare/app_opt/p2p/controllers/dist_opt_controller.py`. + +### The executor + +Now that we have our `DistOptController`, it's time to take care of the actual execution of the algorithm at the client level - we'll build on top of the NVFlare `Executor` to do to that. + +In NVFlare, an `Executor` is a client-side component that handles tasks received from the controller and executes them. For our purposes we'll need our executor to be able to do a few things: +- receive the config from the server/controller +- communicate with its neighbors and send/receive messages to/from them +- run the algorithm + +For the moment, we'll focus on synchronous algorithms only, meaning that the clients will need to run the iterations of an algorithm in a synchronous way. Let' call our executor `SyncAlgorithmExecutor`. +The only method that must be implemented in this case is the `execute` method, which takes the `task_name` and `shareable` sent from the controller as inputs. + +```python +from nvflare.apis.executor import Executor +from nvflare.apis.fl_constant import ReturnCode +from nvflare.apis.fl_context import FLContext +from nvflare.apis.shareable import Shareable, make_reply + + +class SyncAlgorithmExecutor(Executor): + + def execute( + self, + task_name: str, + shareable: Shareable, + fl_ctx: FLContext, + abort_signal: Signal, + ): + if task_name == "config": + # TODO: receive and store config + ... + return make_reply(ReturnCode.OK) + + elif task_name == "run_algorithm": + # TODO: run the algorithm + return make_reply(ReturnCode.OK) + else: + self.log_warning(fl_ctx, f"Unknown task name: {task_name}") + return make_reply(ReturnCode.TASK_UNKNOWN) +``` + +Let's focus on the execution of the `"config"` task - for this we can just create some attributes to store the config and the neighbors (and the local weight computed from them). + +```python +from nvflare.apis.dxo import from_shareable +from nvdo.types import LocalConfig, Neighbor + + +class SyncAlgorithmExecutor(Executor): + def __init__(self): + super().__init__() + + self.config = None + self._weight = None + self.neighbors: list[Neighbor] = [] + + + def execute( + self, + task_name: str, + shareable: Shareable, + fl_ctx: FLContext, + abort_signal: Signal, + ): + if task_name == "config": + # Receive and store config + self.config = LocalConfig(**from_shareable(shareable).data) + self.neighbors = self.config.neighbors + self._weight = 1.0 - sum([n.weight for n in self.neighbors]) + return make_reply(ReturnCode.OK) + + elif task_name == "run_algorithm": + # TODO: run the algorithm + return make_reply(ReturnCode.OK) + else: + self.log_warning(fl_ctx, f"Unknown task name: {task_name}") + return make_reply(ReturnCode.TASK_UNKNOWN) +``` + +That was relatively easy - so, now to the slightly more challenging part of letting clients communicate with each other. +To do that we'll do a few things: +- we'll use the `send_aux_request` method to let a client send a message to its neighbors +- we'll need to register a callback to handle received messages (via the `register_aux_message_handler` function) and add an attribute `neighbors_values` to store received values. We'll call the callback `_handle_neighbor_value` and the registration will be done in the `handle_event` method at start time (i.e., when receiving the `EventType.START_RUN` event). Other events can be handled in the same way if needed. +- we'll use threading events and locks to synchronize the execution of the algorithm (making each client, when sending a message, wait to have received the messages of all its neighbors before sending the next message) +- we'll add two methods, `_from_message` and `_to_message` to convert between the message exchange formats (which will need to be overridden in subclasses, based on the algorithm) + +The main message exchange will be done in the `_exchange_values` function. + +```python +import threading +from abc import abstractmethod +from collections import defaultdict + +from nvflare.apis.dxo import DXO, DataKind +from nvflare.apis.event_type import EventType +from nvflare.apis.signal import Signal + + +class SyncAlgorithmExecutor(Executor): + def __init__(self): + super().__init__() + ... # other attributes + + self.neighbors_values = defaultdict(dict) + + self.sync_waiter = threading.Event() + self.lock = threading.Lock() + + + def _exchange_values(self, fl_ctx: FLContext, value: any, iteration: int): + engine = fl_ctx.get_engine() + + # Clear the event before starting the exchange + self.sync_waiter.clear() + + # Send message to neighbors + _ = engine.send_aux_request( + targets=[neighbor.id for neighbor in self.neighbors], + topic="send_value", + request=DXO( + data_kind=DataKind.METRICS, + data={ + "value": self._to_message(value), + "iteration": iteration, + }, + ).to_shareable(), + timeout=10, + fl_ctx=fl_ctx, + ) + + # check if all neighbors sent their values + if len(self.neighbors_values[iteration]) < len(self.neighbors): + # if not, wait for them, max 10 seconds + if not self.sync_waiter.wait(timeout=10): + self.system_panic("failed to receive values from all neighbors", fl_ctx) + return + + def _handle_neighbor_value( + self, topic: str, request: Shareable, fl_ctx: FLContext + ) -> Shareable: + sender = request.get_peer_props()["__identity_name__"] + data = from_shareable(request).data + iteration = data["iteration"] + + with self.lock: + self.neighbors_values[iteration][sender] = self._from_message(data["value"]) + # Check if all neighbor values have been received + if len(self.neighbors_values[iteration]) >= len(self.neighbors): + self.sync_waiter.set() # Signal that we have all neighbor values + return make_reply(ReturnCode.OK) + + def handle_event(self, event_type: str, fl_ctx: FLContext): + if event_type == EventType.START_RUN: + engine = fl_ctx.get_engine() + + engine.register_aux_message_handler( + topic="send_value", message_handle_func=self._handle_neighbor_value + ) + + def _to_message(self, x): + return x + + def _from_message(self, x): + return x + +``` + +That's it for the synchronous message exchange. Notice that `neighbors_values` needs to maintain a dictionary of received values per iteration. +This is because, different parts of a network may be at different iterations of the algorithm (plus or minus 1 at most) - this means that I could receive a message from a neighbor valid for iteration `t+1` when I'm still at iteration `t`. Since that message won't be sent again, I need to store it. To avoid the `neighbors_values` to grow indefinitely, we'll delete its content at iteration `t` after having consumed its values and moving to the next iteration in the algorithm. We'll see that in the next section. + +Moving forward, now that we have a way to store the config and exchange messages with the neighbors, we can move on to implementing the algorithmic part. For this base `SyncAlgorithmExecutor`, we'll just implement the main logic in the `execute` method, based on an abstract `run_algorithm` to be overridden by each specific algorithm. + +```python +class SyncAlgorithmExecutor(Executor): + + ... + + def execute( + self, + task_name: str, + shareable: Shareable, + fl_ctx: FLContext, + abort_signal: Signal, + ): + if task_name == "config": + # Receive topology from the server + self._load_config(shareable, fl_ctx) + return make_reply(ReturnCode.OK) + + elif task_name == "run_algorithm": + self.run_algorithm(fl_ctx, shareable, abort_signal) + return make_reply(ReturnCode.OK) + else: + self.log_warning(fl_ctx, f"Unknown task name: {task_name}") + return make_reply(ReturnCode.TASK_UNKNOWN) + + @abstractmethod + def run_algorithm( + self, fl_ctx: FLContext, shareable: Shareable, abort_signal: Signal + ): + """Executes the algorithm""" + pass + + ... +``` + +And that's all. The full implementation is in `nvflare/app_opt/p2p/executors/sync_executor.py` - note that the implementation of the `SyncAlgorithmExecutor` in `nvflare.app_opt.p2p` is a subclass of `BaseDistOptExecutor`, defined in `nvflare/app_opt/p2p/executors/base_dist_opt_executor.py`. It contains a few additional attributes (namely `self.id` and `self.client_name`) to identify the client, which are potentially useful in algorithms, and two additional methods `_pre_algorithm_run` and `_post_algorithm_run` to be overridden by each specific algorithm to execute some code before and after the algorithm execution, respectively. + +### An example: the `ConsensusExecutor` + +Now that we have built all the main foundations, we can easily implement any custom P2P algorithm. For example, let's implement a slightly simplified version of the `ConsensusExecutor` that will be used in the next section and whose full implementation is in `nvflare/app_opt/p2p/executors/consensus.py`. + +```python +import torch + +class ConsensusExecutor(SyncAlgorithmExecutor): + + def __init__( + self, + initial_value: float | None = None, + ): + super().__init__() + if initial_value is None: + initial_value = random.random() + self.current_value = initial_value + self.value_history = [self.current_value] + + def run_algorithm(self, fl_ctx, shareable, abort_signal): + iterations = from_shareable(shareable).data["iterations"] + + for iteration in range(iterations): + # 1. exchange values + self._exchange_values( + fl_ctx, value=self.current_value, iteration=iteration + ) + + # 2. compute new value + current_value = self.current_value * self._weight + for neighbor in self.neighbors: + current_value += ( + self.neighbors_values[iteration][neighbor.id] * neighbor.weight + ) + + # 3. store current value + self.current_value = current_value + + # free memory that's no longer needed + del self.neighbors_values[iteration] + +``` + +As you can see, it's basically just a matter of implementing the algorithm logic in the `run_algorithm` method. Feel free to explore the `nvflare.app_opt.p2p` module to see how other algorithms are implemented. \ No newline at end of file diff --git a/examples/advanced/distributed_optimization/requirements.txt b/examples/advanced/distributed_optimization/requirements.txt new file mode 100644 index 0000000000..47d6aba334 --- /dev/null +++ b/examples/advanced/distributed_optimization/requirements.txt @@ -0,0 +1,8 @@ +omegaconf==2.3.0 +rich==13.9.4 +torch==2.5.1 +torchvision==0.20.1 +matplotlib==3.10.0 +scipy==1.15.0 +scikit-learn==1.6.1 +networkx==3.4.2 \ No newline at end of file diff --git a/nvflare/app_opt/p2p/__init__.py b/nvflare/app_opt/p2p/__init__.py new file mode 100644 index 0000000000..ecc3520aba --- /dev/null +++ b/nvflare/app_opt/p2p/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/nvflare/app_opt/p2p/controllers/__init__.py b/nvflare/app_opt/p2p/controllers/__init__.py new file mode 100644 index 0000000000..9ebd02eda7 --- /dev/null +++ b/nvflare/app_opt/p2p/controllers/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .dist_opt_controller import DistOptController # noqa \ No newline at end of file diff --git a/nvflare/app_opt/p2p/controllers/dist_opt_controller.py b/nvflare/app_opt/p2p/controllers/dist_opt_controller.py new file mode 100644 index 0000000000..e46c077987 --- /dev/null +++ b/nvflare/app_opt/p2p/controllers/dist_opt_controller.py @@ -0,0 +1,72 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from nvflare.apis.controller_spec import Task +from nvflare.apis.dxo import DXO, DataKind +from nvflare.apis.fl_context import FLContext +from nvflare.apis.impl.controller import Controller +from nvflare.apis.signal import Signal +from nvflare.app_opt.p2p.types import Config + + +class DistOptController(Controller): + """Controller for running a peer-to-peer distributed optimization algorithm on a network. + + This controller manages the execution of a distributed optimization algorithm by configuring + each client with its neighbors and initiating the algorithm execution across the network. + + Args: + config (Config): The P2P network configuration containing node and neighbor information. + """ + + def __init__( + self, + config: Config, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.config = config + + def control_flow(self, abort_signal: Signal, fl_ctx: FLContext): + # Send network config (aka neighors info) to each client + for node in self.config.network.nodes: + task = Task( + name="config", + data=DXO( + data_kind=DataKind.APP_DEFINED, + data={"neighbors": [n.__dict__ for n in node.neighbors]}, + ).to_shareable(), + ) + self.send_and_wait(task=task, targets=[node.id], fl_ctx=fl_ctx) + + # Run algorithm (with extra params if any passed as data) + targets = [node.id for node in self.config.network.nodes] + self.broadcast_and_wait( + task=Task( + name="run_algorithm", + data=DXO( + data_kind=DataKind.APP_DEFINED, + data={key: value for key, value in self.config.extra.items()}, + ).to_shareable(), + ), + targets=targets, + min_responses=0, + fl_ctx=fl_ctx, + ) + + def start_controller(self, fl_ctx: FLContext): + self.log_info(fl_ctx, "P2PAlgorithmController started") + + def stop_controller(self, fl_ctx: FLContext): + self.log_info(fl_ctx, "P2PAlgorithmController stopped") diff --git a/nvflare/app_opt/p2p/executors/__init__.py b/nvflare/app_opt/p2p/executors/__init__.py new file mode 100644 index 0000000000..3907373056 --- /dev/null +++ b/nvflare/app_opt/p2p/executors/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .base_dist_opt_executor import BaseDistOptExecutor # noqa +from .sync_executor import SyncAlgorithmExecutor # noqa +from .consensus import ConsensusExecutor # noqa +from .distributed_gradient_descent import DGDExecutor # noqa +from .gradient_tracking import GTExecutor # noqa +from .gtadam import GTADAMExecutor # noqa diff --git a/nvflare/app_opt/p2p/executors/base_dist_opt_executor.py b/nvflare/app_opt/p2p/executors/base_dist_opt_executor.py new file mode 100644 index 0000000000..20338b0f46 --- /dev/null +++ b/nvflare/app_opt/p2p/executors/base_dist_opt_executor.py @@ -0,0 +1,160 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import abstractmethod, ABC + +from nvflare.apis.dxo import from_shareable +from nvflare.apis.event_type import EventType +from nvflare.apis.executor import Executor +from nvflare.apis.fl_constant import ReturnCode +from nvflare.apis.fl_context import FLContext +from nvflare.apis.shareable import Shareable, make_reply +from nvflare.apis.signal import Signal +from nvflare.app_opt.p2p.types import LocalConfig, Neighbor + + +class BaseDistOptExecutor(Executor, ABC): + """Base class for peer-to-peer distributed optimization algorithm executors. + + This class provides the foundational structure for executors that run P2P distributed + optimization algorithms. It handles the configuration of the local network, + execution flow based on tasks, and interaction with neighboring clients. + + Attributes: + id (int): Unique identifier for the client. + client_name (str): Name of the client. + config (LocalConfig): Local configuration containing neighbor information. + _weight (float): Weight assigned to the client in the network topology. + neighbors (list[Neighbor]): List of neighboring clients. + """ + + def __init__(self): + super().__init__() + + self.id = None + self.client_name = None + self.config = None + self._weight = None + + self.neighbors: list[Neighbor] = [] + + def execute( + self, + task_name: str, + shareable: Shareable, + fl_ctx: FLContext, + abort_signal: Signal, + ): + if task_name == "config": + # Load local network config + self.config = LocalConfig(**from_shareable(shareable).data) + self.neighbors = self.config.neighbors + self._weight = 1.0 - sum([n.weight for n in self.neighbors]) + return make_reply(ReturnCode.OK) + + elif task_name == "run_algorithm": + # Run the algorithm + self._pre_algorithm_run(fl_ctx, shareable, abort_signal) + self.run_algorithm(fl_ctx, shareable, abort_signal) + self._post_algorithm_run(fl_ctx, shareable, abort_signal) + return make_reply(ReturnCode.OK) + else: + self.log_warning(fl_ctx, f"Unknown task name: {task_name}") + return make_reply(ReturnCode.TASK_UNKNOWN) + + @abstractmethod + def run_algorithm( + self, fl_ctx: FLContext, shareable: Shareable, abort_signal: Signal + ): + """Abstract method to execute the main P2P algorithm. + + Subclasses must implement this method to define the algorithm logic. + """ + pass + + def _pre_algorithm_run( + self, fl_ctx: FLContext, shareable: Shareable, abort_signal: Signal + ): + """Hook method executed before running the main algorithm. + + Can be overridden to perform setup tasks. + """ + pass + + def _post_algorithm_run( + self, fl_ctx: FLContext, shareable: Shareable, abort_signal: Signal + ): + """Hook method executed after running the main algorithm. + + Can be overridden to perform cleanup tasks or save results. + """ + pass + + @abstractmethod + def _exchange_values(self, fl_ctx: FLContext, value: any, *args, **kwargs): + """Abstract method to handle value exchange with neighbors. + + Subclasses must implement this method to define how values are exchanged. + + Args: + fl_ctx (FLContext): Federated learning context. + value (any): The value to exchange. + + """ + pass + + @abstractmethod + def _handle_neighbor_value( + self, topic: str, request: Shareable, fl_ctx: FLContext + ) -> Shareable: + """Abstract method to process incoming values from neighbors. + + Subclasses must implement this method to handle incoming data. + + Args: + topic (str): Topic of the incoming message. + request (Shareable): The incoming data from a neighbor. + fl_ctx (FLContext): Federated learning context. + + Returns: + Shareable: A response message or data to send back. + """ + pass + + def _to_message(self, x): + """Converts a value to a message format suitable for transmission. + + Args: + x (any): The value to convert. + + Returns: + any: The converted message. + """ + return x + + def _from_message(self, x): + """Converts a received message back to its original value format. + + Args: + x (any): The received message. + + Returns: + any: The original value. + """ + return x + + def handle_event(self, event_type: str, fl_ctx: FLContext): + if event_type == EventType.START_RUN: + self.client_name = fl_ctx.get_identity_name() + self.id = int(self.client_name.split("-")[1]) diff --git a/nvflare/app_opt/p2p/executors/consensus.py b/nvflare/app_opt/p2p/executors/consensus.py new file mode 100644 index 0000000000..aad71c6510 --- /dev/null +++ b/nvflare/app_opt/p2p/executors/consensus.py @@ -0,0 +1,78 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import random + +import torch + +from nvflare.apis.dxo import from_shareable +from nvflare.app_opt.p2p.executors.sync_executor import SyncAlgorithmExecutor + + +class ConsensusExecutor(SyncAlgorithmExecutor): + """An executor that implements a consensus algorithm in a peer-to-peer (P2P) setup. + + This executor extends the SyncAlgorithmExecutor to implement a simple consensus algorithm. + The client starts with an initial value and iteratively exchanges values with its neighbors. + At each iteration, the client updates its current value based on its own value and the weighted sum + of its neighbors' values. The process continues for a specified number of iterations, and the history + of values is saved at the end of the run. + + The number of iterations must be provided by the controller when asing to run the algorithm. It can + be set in the extra parameters of the controller's config with the "iterations" key. + + Args: + initial_value (float, optional): The initial value for the consensus algorithm. + If not provided, a random value between 0 and 1 is used. + + Attributes: + current_value (float): The current value of the client in the consensus algorithm. + value_history (list[float]): A list storing the history of values over iterations. + """ + + def __init__( + self, + initial_value: float | None = None, + ): + super().__init__() + if initial_value is None: + initial_value = random.random() + self.initial_value = initial_value + self.current_value = initial_value + self.value_history = [self.current_value] + + def run_algorithm(self, fl_ctx, shareable, abort_signal): + iterations = from_shareable(shareable).data["iterations"] + + for iteration in range(iterations): + if abort_signal.triggered: + break + + # run algorithm step + # 1. exchange values + self._exchange_values(fl_ctx, value=self.current_value, iteration=iteration) + + # 2. compute new value + current_value = self.current_value * self._weight + for neighbor in self.neighbors: + current_value += self.neighbors_values[iteration][neighbor.id] * neighbor.weight + + # 3. store current value + self.current_value = current_value + self.value_history.append(current_value) + + # free memory that's no longer needed + del self.neighbors_values[iteration] + + def _post_algorithm_run(self, *args, **kwargs): + torch.save(torch.tensor(self.value_history), "value_sequence.pt") diff --git a/nvflare/app_opt/p2p/executors/distributed_gradient_descent.py b/nvflare/app_opt/p2p/executors/distributed_gradient_descent.py new file mode 100644 index 0000000000..f9fb1c12b0 --- /dev/null +++ b/nvflare/app_opt/p2p/executors/distributed_gradient_descent.py @@ -0,0 +1,173 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +from abc import abstractmethod + +import torch + +from nvflare.apis.dxo import from_shareable +from nvflare.app_opt.p2p.executors.sync_executor import SyncAlgorithmExecutor +from nvflare.app_opt.p2p.utils.metrics import compute_loss_over_dataset +from nvflare.app_opt.p2p.utils.utils import get_device + + +class DGDExecutor(SyncAlgorithmExecutor): + """An executor that implements Stochastic Distributed Gradient Descent (DGD) in a peer-to-peer (P2P) learning setup. + + Each client maintains its own local model and synchronously exchanges model parameters with its neighbors + at each iteration. The model parameters are updated based on the neighbors' parameters and local gradient descent steps. + The executor also tracks and records training, validation and test losses over time. + + The number of iterations and the learning rate must be provided by the controller when assigning to run the algorithm. + They can be set in the extra parameters of the controller's config with the "iterations" and "stepsize" keys. + + Note: + Subclasses must implement the __init__ method to initialize the model, loss function, and data loaders. + + Args: + model (torch.nn.Module, optional): The neural network model used for training. + loss (torch.nn.modules.loss._Loss, optional): The loss function used for training. + train_dataloader (torch.utils.data.DataLoader, optional): DataLoader for the training dataset. + test_dataloader (torch.utils.data.DataLoader, optional): DataLoader for the testing dataset. + val_dataloader (torch.utils.data.DataLoader, optional): DataLoader for the validation dataset. + + Attributes: + model (torch.nn.Module): The neural network model. + loss (torch.nn.modules.loss._Loss): The loss function. + train_dataloader (torch.utils.data.DataLoader): DataLoader for training data. + test_dataloader (torch.utils.data.DataLoader): DataLoader for testing data. + val_dataloader (torch.utils.data.DataLoader): DataLoader for validation data. + train_loss_sequence (list[tuple]): Records of training loss over time. + test_loss_sequence (list[tuple]): Records of testing loss over time. + """ + + @abstractmethod + def __init__( + self, + model: torch.nn.Module | None = None, + loss: torch.nn.modules.loss._Loss | None = None, + train_dataloader: torch.utils.data.DataLoader | None = None, + test_dataloader: torch.utils.data.DataLoader | None = None, + val_dataloader: torch.utils.data.DataLoader | None = None, + ): + super().__init__() + self.device = get_device() + self.model = model.to(self.device) + self.loss = loss.to(self.device) + self.train_dataloader = train_dataloader + self.test_dataloader = test_dataloader + self.val_dataloader = val_dataloader + + # metrics + self.train_loss_sequence = [] + self.test_loss_sequence = [] + + def run_algorithm(self, fl_ctx, shareable, abort_signal): + start_time = time.time() + iter_dataloader = iter(self.train_dataloader) + + for iteration in range(self._iterations): + self.log_info(fl_ctx, f"iteration: {iteration}/{self._iterations}") + if abort_signal.triggered: + break + + try: + data, label = next(iter_dataloader) + data, label = data.to(self.device), label.to(self.device) + except StopIteration: + # 3. store metrics + current_time = time.time() - start_time + self.train_loss_sequence.append( + ( + current_time, + compute_loss_over_dataset( + self.model, + self.loss, + self.train_dataloader, + device=self.device, + ), + ) + ) + self.test_loss_sequence.append( + ( + current_time, + compute_loss_over_dataset( + self.model, + self.loss, + self.test_dataloader, + device=self.device, + ), + ) + ) + # restart after an epoch + iter_dataloader = iter(self.train_dataloader) + data, label = next(iter_dataloader) + data, label = data.to(self.device), label.to(self.device) + + # run algorithm step + # 1. exchange values + with torch.no_grad(): + self._exchange_values( + fl_ctx, value=self.model.parameters(), iteration=iteration + ) + + # compute consensus value + for idx, param in enumerate(self.model.parameters()): + if param.requires_grad: + param.mul_(self._weight) + for neighbor in self.neighbors: + neighbor_param = self.neighbors_values[iteration][ + neighbor.id + ][idx].to(self.device) + param.add_( + neighbor_param, + alpha=neighbor.weight, + ) + # 2. update current value + self.model.zero_grad() + pred = self.model(data) + loss = self.loss(pred, label) + loss.backward() + + with torch.no_grad(): + for param in self.model.parameters(): + if param.grad is not None: + param.add_(param.grad, alpha=-self._stepsize) + + # free memory that's no longer needed + del self.neighbors_values[iteration] + + def _to_message(self, x): + return [param.cpu().numpy() for param in iter(x) if param.requires_grad] + + def _from_message(self, x): + return [torch.from_numpy(param) for param in x] + + def _pre_algorithm_run(self, fl_ctx, shareable, abort_signal): + self._iterations = from_shareable(shareable).data["iterations"] + self._stepsize = from_shareable(shareable).data["stepsize"] + + init_train_loss = compute_loss_over_dataset( + self.model, self.loss, self.train_dataloader, device=self.device + ) + init_test_loss = compute_loss_over_dataset( + self.model, self.loss, self.test_dataloader, device=self.device + ) + + self.train_loss_sequence.append((0, init_train_loss)) + self.test_loss_sequence.append((0, init_test_loss)) + + def _post_algorithm_run(self, *args, **kwargs): + torch.save(torch.tensor(self.train_loss_sequence), "train_loss_sequence.pt") + torch.save(torch.tensor(self.test_loss_sequence), "test_loss_sequence.pt") diff --git a/nvflare/app_opt/p2p/executors/gradient_tracking.py b/nvflare/app_opt/p2p/executors/gradient_tracking.py new file mode 100644 index 0000000000..8272ee1d34 --- /dev/null +++ b/nvflare/app_opt/p2p/executors/gradient_tracking.py @@ -0,0 +1,216 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +from abc import abstractmethod + +import torch + +from nvflare.apis.dxo import from_shareable +from nvflare.app_opt.p2p.executors.sync_executor import SyncAlgorithmExecutor +from nvflare.app_opt.p2p.utils.metrics import compute_loss_over_dataset +from nvflare.app_opt.p2p.utils.utils import get_device + + +class GTExecutor(SyncAlgorithmExecutor): + """An executor that implements Stochastic Gradient Tracking (GT) in a peer-to-peer (P2P) learning setup. + + Each client maintains its own local model and synchronously exchanges model parameters with its neighbors + at each iteration. The model parameters are updated based on the neighbors' parameters and local gradient descent steps. + The executor also tracks and records training, validation and test losses over time. + + The number of iterations and the learning rate must be provided by the controller when asing to run the algorithm. + They can be set in the extra parameters of the controller's config with the "iterations" and "stepsize" keys. + + Note: + Subclasses must implement the __init__ method to initialize the model, loss function, and data loaders. + + Args: + model (torch.nn.Module, optional): The neural network model used for training. + loss (torch.nn.modules.loss._Loss, optional): The loss function used for training. + train_dataloader (torch.utils.data.DataLoader, optional): DataLoader for the training dataset. + test_dataloader (torch.utils.data.DataLoader, optional): DataLoader for the testing dataset. + val_dataloader (torch.utils.data.DataLoader, optional): DataLoader for the validation dataset. + + Attributes: + model (torch.nn.Module): The neural network model. + loss (torch.nn.modules.loss._Loss): The loss function. + train_dataloader (torch.utils.data.DataLoader): DataLoader for training data. + test_dataloader (torch.utils.data.DataLoader): DataLoader for testing data. + val_dataloader (torch.utils.data.DataLoader): DataLoader for validation data. + train_loss_sequence (list[tuple]): Records of training loss over time. + test_loss_sequence (list[tuple]): Records of testing loss over time. + """ + + @abstractmethod + def __init__( + self, + model: torch.nn.Module | None = None, + loss: torch.nn.modules.loss._Loss | None = None, + train_dataloader: torch.utils.data.DataLoader | None = None, + test_dataloader: torch.utils.data.DataLoader | None = None, + val_dataloader: torch.utils.data.DataLoader | None = None, + ): + super().__init__() + self.device = get_device() + self.model = model.to(self.device) + self.loss = loss.to(self.device) + self.train_dataloader = train_dataloader + self.test_dataloader = test_dataloader + self.val_dataloader = val_dataloader + + # metrics + self.train_loss_sequence = [] + self.test_loss_sequence = [] + + def run_algorithm(self, fl_ctx, shareable, abort_signal): + start_time = time.time() + iter_dataloader = iter(self.train_dataloader) + + for iteration in range(self._iterations): + self.log_info(fl_ctx, f"iteration: {iteration}/{self._iterations}") + if abort_signal.triggered: + break + + try: + data, label = next(iter_dataloader) + data, label = data.to(self.device), label.to(self.device) + except StopIteration: + # 3. store metrics + current_time = time.time() - start_time + self.train_loss_sequence.append( + ( + current_time, + compute_loss_over_dataset( + self.model, self.loss, self.train_dataloader, self.device + ), + ) + ) + self.test_loss_sequence.append( + ( + current_time, + compute_loss_over_dataset( + self.model, self.loss, self.test_dataloader, self.device + ), + ) + ) + # restart after an epoch + iter_dataloader = iter(self.train_dataloader) + data, label = next(iter_dataloader) + data, label = data.to(self.device), label.to(self.device) + + # run algorithm step + with torch.no_grad(): + # 1. exchange trainable parameters and tracker + value_to_exchange = { + "parameters": self.model.parameters(), + "tracker": self.tracker, + } + self._exchange_values( + fl_ctx, value=value_to_exchange, iteration=iteration + ) + + # 2. Update trainable parameters + # - a. compute consensus value + for idx, param in enumerate(self.model.parameters()): + if param.requires_grad: + param.mul_(self._weight) + for neighbor in self.neighbors: + neighbor_param = self.neighbors_values[iteration][ + neighbor.id + ]["parameters"][idx].to(self.device) + param.add_( + neighbor_param, + alpha=neighbor.weight, + ) + + # - b. update local parameters + self._update_local_state(self._stepsize) + + # 3. Update tracker + # - a. consensus on tracker + for idx, tracker in enumerate(iter(self.tracker)): + tracker.mul_(self._weight) + for neighbor in self.neighbors: + neighbor_tracker = self.neighbors_values[iteration][ + neighbor.id + ]["tracker"][idx].to(self.device) + tracker.add_( + neighbor_tracker, + alpha=neighbor.weight, + ) + + # -b. compute new gradients + self.model.zero_grad() + pred = self.model(data) + loss = self.loss(pred, label) + loss.backward() + + gradient = [param.grad.clone() for param in self.model.parameters()] + + # - c. update tracker + with torch.no_grad(): + for i in range(len(self.tracker)): + self.tracker[i].add_(gradient[i], alpha=1.0) + self.tracker[i].sub_(self.old_gradient[i], alpha=1.0) + + self.old_gradient = [g.clone() for g in gradient] + + # 4. free memory that's no longer needed + del self.neighbors_values[iteration] + + def _update_local_state(self, stepsize): + for idx, param in enumerate(self.model.parameters()): + if param.requires_grad: + param.add_(self.tracker[idx], alpha=-stepsize) + + def _to_message(self, x): + return { + "parameters": [param.cpu().numpy() for param in iter(x["parameters"])], + "tracker": [z.cpu().numpy() for z in iter(x["tracker"])], + } + + def _from_message(self, x): + return { + "parameters": [torch.from_numpy(param) for param in x["parameters"]], + "tracker": [torch.from_numpy(z) for z in x["tracker"]], + } + + def _pre_algorithm_run(self, fl_ctx, shareable, abort_signal): + data = from_shareable(shareable).data + self._iterations = data["iterations"] + self._stepsize = data["stepsize"] + + init_train_loss = compute_loss_over_dataset( + self.model, self.loss, self.train_dataloader, self.device + ) + init_test_loss = compute_loss_over_dataset( + self.model, self.loss, self.test_dataloader, self.device + ) + + self.train_loss_sequence.append((0, init_train_loss)) + self.test_loss_sequence.append((0, init_test_loss)) + + # initialize tracker + self.old_gradient = [ + torch.zeros_like(param, device=self.device) + for param in self.model.parameters() + ] + self.tracker = [ + torch.zeros_like(param, device=self.device) + for param in self.model.parameters() + ] + + def _post_algorithm_run(self, *args, **kwargs): + torch.save(torch.tensor(self.train_loss_sequence), "train_loss_sequence.pt") + torch.save(torch.tensor(self.test_loss_sequence), "test_loss_sequence.pt") diff --git a/nvflare/app_opt/p2p/executors/gtadam.py b/nvflare/app_opt/p2p/executors/gtadam.py new file mode 100644 index 0000000000..868c39479f --- /dev/null +++ b/nvflare/app_opt/p2p/executors/gtadam.py @@ -0,0 +1,74 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from nvflare.apis.dxo import from_shareable +from nvflare.app_opt.p2p.executors.gradient_tracking import GTExecutor + + +class GTADAMExecutor(GTExecutor): + """An executor that implements GTAdam in a peer-to-peer (P2P) learning setup. + + Each client maintains its own local model and synchronously exchanges model parameters with its neighbors + at each iteration. The model parameters are updated based on the neighbors' parameters and local gradient descent steps. + The executor also tracks and records training, validation and test losses over time. + + The number of iterations, the learning rate and the beta1, beta2 and epsilon hyperparameters must be provided + by the controller when asing to run the algorithm. They can be set in the extra parameters of the controller's + config with the "iterations", "stepsize", "beta1", "beta2", and "epsilon" keys. + + Note: + Subclasses must implement the __init__ method to initialize the model, loss function, and data loaders. + + Args: + model (torch.nn.Module, optional): The neural network model used for training. + loss (torch.nn.modules.loss._Loss, optional): The loss function used for training. + train_dataloader (torch.utils.data.DataLoader, optional): DataLoader for the training dataset. + test_dataloader (torch.utils.data.DataLoader, optional): DataLoader for the testing dataset. + val_dataloader (torch.utils.data.DataLoader, optional): DataLoader for the validation dataset. + + Attributes: + model (torch.nn.Module): The neural network model. + loss (torch.nn.modules.loss._Loss): The loss function. + train_dataloader (torch.utils.data.DataLoader): DataLoader for training data. + test_dataloader (torch.utils.data.DataLoader): DataLoader for testing data. + val_dataloader (torch.utils.data.DataLoader): DataLoader for validation data. + train_loss_sequence (list[tuple]): Records of training loss over time. + test_loss_sequence (list[tuple]): Records of testing loss over time. + """ + def _pre_algorithm_run(self, fl_ctx, shareable, abort_signal): + super()._pre_algorithm_run(fl_ctx, shareable, abort_signal) + + data = from_shareable(shareable).data + self.beta1 = data["beta1"] + self.beta2 = data["beta2"] + self.epsilon = data["epsilon"] + self.G = torch.tensor(1e6, device=self.device) + self.m = [torch.zeros_like(param, device=self.device) for param in self.model.parameters()] + self.v = [torch.zeros_like(param, device=self.device) for param in self.model.parameters()] + + def _update_local_state(self, stepsize): + for i in range(len(self.tracker)): + self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * self.tracker[i] + self.v[i] = torch.minimum( + self.beta2 * self.v[i] + (1 - self.beta2) * self.tracker[i] ** 2, self.G + ) + + with torch.no_grad(): + for idx, param in enumerate(self.model.parameters()): + if param.requires_grad: + descent = torch.divide( + self.m[idx], torch.sqrt(self.v[idx] + self.epsilon) + ) + param.add_(descent, alpha=-stepsize) diff --git a/nvflare/app_opt/p2p/executors/sync_executor.py b/nvflare/app_opt/p2p/executors/sync_executor.py new file mode 100644 index 0000000000..ed40b4fdf2 --- /dev/null +++ b/nvflare/app_opt/p2p/executors/sync_executor.py @@ -0,0 +1,128 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import threading +from collections import defaultdict + +from nvflare.apis.dxo import DXO, DataKind, from_shareable +from nvflare.apis.event_type import EventType +from nvflare.apis.fl_constant import ReturnCode +from nvflare.apis.fl_context import FLContext +from nvflare.apis.shareable import Shareable, make_reply +from nvflare.app_opt.p2p.executors.base_dist_opt_executor import BaseDistOptExecutor + + +class SyncAlgorithmExecutor(BaseDistOptExecutor): + """An executor to implement synchronous peer-to-peer (P2P) algorithms. + + This executor extends the BaseP2PAlgorithmExecutor to support synchronous execution + of P2P algorithms. It manages the exchange of values with neighboring clients and ensures + synchronization at each iteration. + + Args: + sync_timeout (int): The timeout for waiting for values from neighbors. Defaults to 10 seconds. + + Attributes: + neighbors_values (defaultdict): A dictionary to store values received from neighbors, + keyed by iteration and neighbor ID. + sync_waiter (threading.Event): An event to synchronize the exchange of values. + lock (threading.Lock): A lock to manage concurrent access to shared data structures. + """ + def __init__(self, sync_timeout: int = 10): + super().__init__() + + self.neighbors_values = defaultdict(dict) + + self.sync_timeout = sync_timeout + self.sync_waiter = threading.Event() + self.lock = threading.Lock() + + def _exchange_values(self, fl_ctx: FLContext, value: any, iteration: int): + """Exchanges values with neighbors synchronously. + + Sends the local value to all neighbors and waits for their values for the current iteration. + Utilizes threading events to synchronize the exchange and ensure all values are received + before proceeding. + + Args: + fl_ctx (FLContext): Federated learning context. + value (any): The local value to send to neighbors. + iteration (int): The current iteration number of the algorithm. + + Raises: + SystemExit: If the values from all neighbors are not received within the timeout. + """ + engine = fl_ctx.get_engine() + + # Clear the event before starting the exchange + self.sync_waiter.clear() + + _ = engine.send_aux_request( + targets=[neighbor.id for neighbor in self.neighbors], + topic="send_value", + request=DXO( + data_kind=DataKind.WEIGHTS, + data={ + "value": self._to_message(value), + "iteration": iteration, + }, + ).to_shareable(), + timeout=10, + fl_ctx=fl_ctx, + ) + + # check if neighbors already sent their values + if len(self.neighbors_values[iteration]) < len(self.neighbors): + # wait for all neighbors to send their values for the current iteration + # if not received after timeout, abort the job + if not self.sync_waiter.wait(timeout=self.sync_timeout): + self.system_panic("failed to receive values from all neighbors", fl_ctx) + return + + def _handle_neighbor_value( + self, topic: str, request: Shareable, fl_ctx: FLContext + ) -> Shareable: + """Handles incoming values from neighbors. + + Processes the received value from a neighbor, stores it, and signals when all neighbor + values for the current iteration have been received. + + Args: + topic (str): Topic of the incoming message. + request (Shareable): The message containing the neighbor's value. + fl_ctx (FLContext): Federated learning context. + + Returns: + Shareable: A reply message indicating successful reception. + """ + sender = request.get_peer_props()["__identity_name__"] + data = from_shareable(request).data + iteration = data["iteration"] + + with self.lock: + # Store the received value in the neighbors_values dictionary + self.neighbors_values[iteration][sender] = self._from_message(data["value"]) + # Check if all neighbor values have been received for the iteration + if len(self.neighbors_values[iteration]) >= len(self.neighbors): + self.sync_waiter.set() # Signal that we have all neighbor values + return make_reply(ReturnCode.OK) + + def handle_event(self, event_type: str, fl_ctx: FLContext): + super().handle_event(event_type, fl_ctx) + if event_type == EventType.START_RUN: + engine = fl_ctx.get_engine() + + # Register the message handler for receiving neighbor values + engine.register_aux_message_handler( + topic="send_value", message_handle_func=self._handle_neighbor_value + ) diff --git a/nvflare/app_opt/p2p/types/__init__.py b/nvflare/app_opt/p2p/types/__init__.py new file mode 100644 index 0000000000..7f9815a171 --- /dev/null +++ b/nvflare/app_opt/p2p/types/__init__.py @@ -0,0 +1,128 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass, field + + +@dataclass +class Neighbor: + """Represents a neighbor in the network. + + Each neighbor is characterized by an identifier and an optional weight, + which can be used in algorithms that require weighted interactions between nodes. + """ + + id: int | str + weight: float | None = None + + @property + def __dict__(self): + return {"id": self.id, "weight": self.weight} + + +@dataclass +class Node: + """Represents a node in the network. + + A node contains an identifier and a list of its neighbors. + During initialization, any neighbor specified as a dictionary is converted + into a `Neighbor` instance to ensure consistent data structures. + """ + + id: int | str | None = None + neighbors: list[Neighbor] = field(default_factory=list) + + @property + def __dict__(self): + return { + "id": self.id, + "neighbors": [neighbor.__dict__ for neighbor in self.neighbors], + } + + def __post_init__(self): + new_neighbors = [] + for neighbor in self.neighbors: + if isinstance(neighbor, dict): + new_neighbors.append(Neighbor(**neighbor)) + else: + new_neighbors.append(neighbor) + self.neighbors = new_neighbors + + +@dataclass +class Network: + """Represents a network consisting of multiple nodes. + + The network defines the topology over which the peer-to-peer algorithm operates. + During initialization, any node specified as a dictionary is converted into a `Node` instance. + """ + + nodes: list[Node] = field(default_factory=list) + + @property + def __dict__(self): + return {"nodes": [node.__dict__ for node in self.nodes]} + + def __post_init__(self): + new_nodes = [] + for node in self.nodes: + if isinstance(node, dict): + new_nodes.append(Node(**node)) + else: + new_nodes.append(node) + self.nodes = new_nodes + + +@dataclass +class Config: + """Represents the configuration for the peer-to-peer algorithm. + + The configuration includes the network topology and any extra algorithm-specific parameters. + Algorithm-specific parameters in `extra` can include settings like 'stepsize', 'iterations', etc. + """ + network: Network + + extra: dict = field(default_factory=dict) + + @property + def __dict__(self): + return {"extra": self.extra, "network": self.network.__dict__} + + def __post_init__(self): + if isinstance(self.network, dict): + self.network = Network(**self.network) + + +@dataclass +class LocalConfig: + """Represents the local configuration for a node in the network. + + The local configuration includes the node's neighbors and any extra algorithm-specific parameters. + Algorithm-specific parameters in `extra` can include settings like 'stepsize', 'iterations', etc. + """ + neighbors: list[Neighbor] + + extra: dict = field(default_factory=dict) + + @property + def __dict__(self): + return {"neighbors": self.neighbors__dict__, "extra": self.extra} + + def __post_init__(self): + new_neighbors = [] + for neighbor in self.neighbors: + if isinstance(neighbor, dict): + new_neighbors.append(Neighbor(**neighbor)) + else: + new_neighbors.append(neighbor) + self.neighbors = new_neighbors diff --git a/nvflare/app_opt/p2p/utils/__init__.py b/nvflare/app_opt/p2p/utils/__init__.py new file mode 100644 index 0000000000..ecc3520aba --- /dev/null +++ b/nvflare/app_opt/p2p/utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/nvflare/app_opt/p2p/utils/config_generator.py b/nvflare/app_opt/p2p/utils/config_generator.py new file mode 100644 index 0000000000..0106fc31ae --- /dev/null +++ b/nvflare/app_opt/p2p/utils/config_generator.py @@ -0,0 +1,65 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import numpy as np + +from nvflare.app_opt.p2p.types import Neighbor, Network, Node +from nvflare.app_opt.p2p.utils.topology import doubly_stochastic_adjacency + + +def generate_random_network( + num_clients: int, + seed: int = 42, + connection_probability: float = 0.3, +) -> Network: + """Generate a random configuration for the given number of clients. + The configuration includes the number of iterations, the network topology, + and the initial values for each node. + + Args: + num_clients (int): The number of clients in the network. + + Returns: + BaseConfig: The generated configuration. + np.ndarray: The weighted adjacency matrix of the network. + """ + np.random.seed(seed=seed) + + while True: + graph = nx.gnp_random_graph(num_clients, p=connection_probability) + if nx.is_connected(graph): + break + adjacency_matrix = nx.adjacency_matrix(graph) + np.eye(num_clients) + weighted_adjacency_matrix = doubly_stochastic_adjacency(graph) + + network = [] + for j in range(num_clients): + in_neighbors = np.nonzero(adjacency_matrix[:, j])[0].tolist() + in_weights = weighted_adjacency_matrix[:, j].tolist() + + neighbors = [ + Neighbor(id=f"site-{i + 1}", weight=in_weights[i]) + for i in in_neighbors + if i != j + ] + + network.append(Node( + id=f"site-{j + 1}", + neighbors=neighbors, + )) + + config = Network( + nodes=network, + ) + return config, weighted_adjacency_matrix diff --git a/nvflare/app_opt/p2p/utils/metrics.py b/nvflare/app_opt/p2p/utils/metrics.py new file mode 100644 index 0000000000..f50240a439 --- /dev/null +++ b/nvflare/app_opt/p2p/utils/metrics.py @@ -0,0 +1,52 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +def compute_loss_over_dataset( + model: torch.nn.Module | None = None, + loss: torch.nn.modules.loss._Loss | None = None, + dataloader: torch.utils.data.DataLoader | None = None, + device: torch.device | None = None, +) -> float: + """ + Compute the average loss over a dataset. + + Args: + model: The model to use for predictions. + loss: The loss function to use. + dataloader: The dataloader for the dataset. + device: The device to use for computation. + + Returns: + The average loss over the dataset. + """ + # Check if all required arguments are provided + if model is None or loss is None or dataloader is None: + raise ValueError("All arguments (model, loss, dataloader) must be provided.") + + model.eval() + epoch_loss = 0 + with torch.no_grad(): + # Iterate over the dataloader + for x, y in dataloader: + # Move data to the specified device + x, y = x.to(device), y.to(device) + # Make predictions + pred = model(x) + # Compute the loss + ls = loss(pred, y) + # Accumulate the loss + epoch_loss += ls.item() * x.size(0) + # Return the average loss + return epoch_loss / len(dataloader.dataset) diff --git a/nvflare/app_opt/p2p/utils/topology.py b/nvflare/app_opt/p2p/utils/topology.py new file mode 100644 index 0000000000..c69f37486f --- /dev/null +++ b/nvflare/app_opt/p2p/utils/topology.py @@ -0,0 +1,41 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import torch + + +def doubly_stochastic_adjacency(graph: nx.Graph) -> torch.Tensor: + """Using Metropolis-Hastings algorithm to compute a doubly stochastic adjacency matrix.""" + num_agents = len(graph.nodes()) + binary_adjacency_matrix = torch.from_numpy(nx.to_numpy_array(graph)).float() + degree = torch.sum(binary_adjacency_matrix, dim=0) + W = torch.zeros((num_agents, num_agents)) + for i in range(num_agents): + N_i = torch.nonzero(binary_adjacency_matrix[i, :]) + for j in N_i: + W[i, j] = 1 / (1 + max(degree[i], degree[j])) + W[i, i] = 1 - torch.sum(W[i, :]) + return W + + +def get_matrix_steady_state(A: torch.Tensor): + """Get the steady state of a matrix via eigendecomposition""" + eigenvalues, eigenvectors = torch.linalg.eig( + A + ) # unnormalized and unordered eigenvalues and eigenvectors + idx = eigenvalues.float().argsort(descending=True) + eigenvectors = eigenvectors.float()[:, idx] + + steady_state = eigenvectors[:, 0] / torch.sum(eigenvectors[:, 0]) + return steady_state diff --git a/nvflare/app_opt/p2p/utils/utils.py b/nvflare/app_opt/p2p/utils/utils.py new file mode 100644 index 0000000000..a82bc6d05d --- /dev/null +++ b/nvflare/app_opt/p2p/utils/utils.py @@ -0,0 +1,9 @@ +import torch + +def get_device(): + if torch.cuda.is_available(): + return torch.device("cuda") + elif torch.backends.mps.is_available(): + return torch.device("mps") + else: + return torch.device("cpu") \ No newline at end of file