Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate DockerBuilder and DockerLauncherBuilder #3186

Merged
merged 9 commits into from
Feb 3, 2025
7 changes: 6 additions & 1 deletion nvflare/lighter/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ class TemplateSectionKey:
START_CLIENT_SH = "start_cln_sh"
DOCKER_BUILD_SH = "docker_build_sh"
DOCKER_SERVER_SH = "docker_svr_sh"
DOCKER_LAUNCHER_SERVER_SH = "docker_launcher_svr_sh"
DOCKER_CLIENT_SH = "docker_cln_sh"
DOCKER_LAUNCHER_CLIENT_SH = "docker_launcher_cln_sh"
DOCKER_ADMIN_SH = "docker_adm_sh"
GUNICORN_CONF_PY = "gunicorn_conf_py"
START_OVERSEER_SH = "start_ovsr_sh"
Expand All @@ -111,6 +113,7 @@ class TemplateSectionKey:
FED_ADMIN = "fed_admin"
COMPOSE_YAML = "compose_yaml"
DOCKERFILE = "dockerfile"
LAUNCHER_DOCKERFILE = "launcher_dockerfile"
HELM_CHART_CHART = "helm_chart_chart"
HELM_CHART_VALUES = "helm_chart_values"
HELM_CHART_SERVICE_OVERSEER = "helm_chart_service_overseer"
Expand All @@ -124,7 +127,8 @@ class ProvFileName:
SUB_START_SH = "sub_start.sh"
PRIVILEGE_YML = "privilege.yml"
DOCKER_BUILD_SH = "docker_build.sh"
DOCKER_SH = "start_docker.sh"
DOCKER_SH = "docker.sh"
DOCKER_LAUNCHER_SH = "docker_launcher.sh"
GUNICORN_CONF_PY = "gunicorn.conf.py"
FED_SERVER_JSON = "fed_server.json"
FED_CLIENT_JSON = "fed_client.json"
Expand All @@ -142,6 +146,7 @@ class ProvFileName:
ENV = ".env"
COMPOSE_BUILD_DIR = "nvflare_compose"
DOCKERFILE = "Dockerfile"
LAUNCHER_DOCKERFILE = "Dockerfile.launcher"
REQUIREMENTS_TXT = "requirements.txt"
SERVER_CONTEXT_TENSEAL = "server_context.tenseal"
CLIENT_CONTEXT_TENSEAL = "client_context.tenseal"
Expand Down
70 changes: 4 additions & 66 deletions nvflare/lighter/impl/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,18 @@
# limitations under the License.

import copy
import json
import os
import shutil

import yaml

from nvflare.app_opt.job_launcher.docker_launcher import ClientDockerJobLauncher, ServerDockerJobLauncher
from nvflare.lighter import utils
from nvflare.lighter.constants import CtxKey, PropKey, ProvFileName, TemplateSectionKey
from nvflare.lighter.constants import CtxKey, ProvFileName, TemplateSectionKey
from nvflare.lighter.spec import Builder, Project, ProvisionContext


class DockerBuilder(Builder):
def __init__(
self, docker_image="nvflare-docker:0.0.1", base_image="python:3.8", requirements_file="requirements.txt"
):
def __init__(self, base_image="python:3.8", requirements_file="requirements.txt"):
"""Build docker compose file."""
self.docker_image = docker_image
self.base_image = base_image
self.requirements_file = requirements_file
self.services = {}
Expand Down Expand Up @@ -63,31 +57,7 @@ def _build_server(self, server, ctx: ProvisionContext):
info_dict["container_name"] = server.name
self.services[server.name] = info_dict

# local folder creation
dest_dir = ctx.get_local_dir(server)
with open(os.path.join(dest_dir, ProvFileName.RESOURCES_JSON_DEFAULT), "rt") as f:
resources = json.load(f)
resources["components"].append(
{
"id": "docker_launcher",
"path": ServerDockerJobLauncher().__module__ + "." + "ServerDockerJobLauncher",
"args": {},
}
)
utils.write(os.path.join(dest_dir, ProvFileName.RESOURCES_JSON_DEFAULT), json.dumps(resources, indent=4), "t")

communication_port = server.get_prop(CtxKey.DOCKER_COMM_PORT)
if communication_port:
replacement_dict = {"comm_host_name": "server-parent", "communication_port": communication_port}
ctx.build_from_template(
dest_dir,
TemplateSectionKey.COMM_CONFIG,
ProvFileName.COMM_CONFIG,
replacement=replacement_dict,
exe=True,
)

def _build_client(self, client, ctx: ProvisionContext):
def _build_client(self, client):
info_dict = copy.deepcopy(self.services["__flclient__"])
info_dict["volumes"] = [f"./{client.name}:" + "${WORKSPACE}"]
info_dict["build"] = "nvflare_compose"
Expand All @@ -101,30 +71,6 @@ def _build_client(self, client, ctx: ProvisionContext):
info_dict["container_name"] = client.name
self.services[client.name] = info_dict

# local folder creation
dest_dir = ctx.get_local_dir(client)
with open(os.path.join(dest_dir, ProvFileName.RESOURCES_JSON_DEFAULT), "rt") as f:
resources = json.load(f)
resources["components"].append(
{
"id": "docker_launcher",
"path": ClientDockerJobLauncher().__module__ + "." + "ClientDockerJobLauncher",
"args": {},
}
)
utils.write(os.path.join(dest_dir, ProvFileName.RESOURCES_JSON_DEFAULT), json.dumps(resources, indent=4), "t")

communication_port = client.get_prop(PropKey.DOCKER_COMM_PORT)
if communication_port:
replacement_dict = {"comm_host_name": client.name + "-parent", "communication_port": communication_port}
ctx.build_from_template(
dest_dir,
TemplateSectionKey.COMM_CONFIG,
ProvFileName.COMM_CONFIG,
replacement=replacement_dict,
exe=True,
)

def build(self, project: Project, ctx: ProvisionContext):
compose = ctx.yaml_load_template_section(TemplateSectionKey.COMPOSE_YAML)
self.services = compose.get("services")
Expand All @@ -137,7 +83,7 @@ def build(self, project: Project, ctx: ProvisionContext):
self._build_server(server, ctx)

for client in project.get_clients():
self._build_client(client, ctx)
self._build_client(client)

self.services.pop("__overseer__", None)
self.services.pop("__flserver__", None)
Expand All @@ -155,14 +101,6 @@ def build(self, project: Project, ctx: ProvisionContext):
with open(os.path.join(compose_build_dir, ProvFileName.DOCKERFILE), "wt") as f:
f.write(f"FROM {self.base_image}\n")
f.write(ctx.get_template_section(TemplateSectionKey.DOCKERFILE))
replacement_dict = {"image": self.docker_image}
ctx.build_from_template(
compose_build_dir,
TemplateSectionKey.DOCKER_BUILD_SH,
ProvFileName.DOCKER_BUILD_SH,
replacement=replacement_dict,
exe=True,
)
try:
shutil.copyfile(self.requirements_file, os.path.join(compose_build_dir, ProvFileName.REQUIREMENTS_TXT))
except Exception:
Expand Down
210 changes: 210 additions & 0 deletions nvflare/lighter/impl/docker_launcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import json
import os
import shutil

import yaml

from nvflare.app_opt.job_launcher.docker_launcher import ClientDockerJobLauncher, ServerDockerJobLauncher
yhwen marked this conversation as resolved.
Show resolved Hide resolved
from nvflare.lighter import utils
from nvflare.lighter.constants import CtxKey, PropKey, ProvFileName, TemplateSectionKey
from nvflare.lighter.spec import Builder, Project, ProvisionContext


class DockerLauncherBuilder(Builder):
yhwen marked this conversation as resolved.
Show resolved Hide resolved
"""DockerLauncherBuilder is used for generating the docker build command and service startup command for using the
DockerJobLauncher as the job launcher (both server and client).

"""

def __init__(
self, docker_image="nvflare-docker:0.0.1", base_image="python:3.8", requirements_file="requirements.txt"
):
"""Build docker compose file."""
self.docker_image = docker_image
self.base_image = base_image
self.requirements_file = requirements_file
self.services = {}
self.compose_file_path = None

def _build_overseer(self, overseer):
protocol = overseer.props.get("protocol", "http")
default_port = "443" if protocol == "https" else "80"
port = overseer.props.get("port", default_port)
info_dict = copy.deepcopy(self.services["__overseer__"])
info_dict["volumes"] = [f"./{overseer.name}:" + "${WORKSPACE}"]
info_dict["ports"] = [f"{port}:{port}"]
info_dict["build"] = "nvflare_compose"
info_dict["container_name"] = overseer.name
self.services[overseer.name] = info_dict

def _build_server(self, server, ctx: ProvisionContext):
fed_learn_port = ctx.get(CtxKey.FED_LEARN_PORT)
admin_port = ctx.get(CtxKey.ADMIN_PORT)

info_dict = copy.deepcopy(self.services["__flserver__"])
info_dict["volumes"][0] = f"./{server.name}:" + "${WORKSPACE}"
info_dict["ports"] = [f"{fed_learn_port}:{fed_learn_port}", f"{admin_port}:{admin_port}"]
info_dict["build"] = "nvflare_compose"
for i in range(len(info_dict["command"])):
if info_dict["command"][i] == "flserver":
info_dict["command"][i] = server.name
if info_dict["command"][i] == "org=__org_name__":
info_dict["command"][i] = f"org={server.org}"
info_dict["container_name"] = server.name
self.services[server.name] = info_dict

# local folder creation
dest_dir = ctx.get_local_dir(server)
with open(os.path.join(dest_dir, ProvFileName.RESOURCES_JSON_DEFAULT), "rt") as f:
resources = json.load(f)
resources["components"].append(
{
"id": "docker_launcher",
"path": ServerDockerJobLauncher().__module__ + "." + "ServerDockerJobLauncher",
"args": {},
}
)
utils.write(os.path.join(dest_dir, ProvFileName.RESOURCES_JSON_DEFAULT), json.dumps(resources, indent=4), "t")

communication_port = server.get_prop(CtxKey.DOCKER_COMM_PORT)
if communication_port:
replacement_dict = {"comm_host_name": "server-parent", "communication_port": communication_port}
ctx.build_from_template(
dest_dir,
TemplateSectionKey.COMM_CONFIG,
ProvFileName.COMM_CONFIG,
replacement=replacement_dict,
exe=True,
)

dest_dir = ctx.get_kit_dir(server)
replacement_dict = {
"admin_port": admin_port,
"fed_learn_port": fed_learn_port,
"comm_host_name": "server-parent",
"communication_port": communication_port,
"docker_image": self.docker_image,
}
ctx.build_from_template(
dest_dir,
TemplateSectionKey.DOCKER_LAUNCHER_SERVER_SH,
ProvFileName.DOCKER_LAUNCHER_SH,
replacement=replacement_dict,
exe=True,
)

def _build_client(self, client, ctx: ProvisionContext):
fed_learn_port = ctx.get(CtxKey.FED_LEARN_PORT)
admin_port = ctx.get(CtxKey.ADMIN_PORT)

info_dict = copy.deepcopy(self.services["__flclient__"])
info_dict["volumes"] = [f"./{client.name}:" + "${WORKSPACE}"]
info_dict["build"] = "nvflare_compose"
for i in range(len(info_dict["command"])):
if info_dict["command"][i] == "flclient":
info_dict["command"][i] = client.name
if info_dict["command"][i] == "uid=__flclient__":
info_dict["command"][i] = f"uid={client.name}"
if info_dict["command"][i] == "org=__org_name__":
info_dict["command"][i] = f"org={client.org}"
info_dict["container_name"] = client.name
self.services[client.name] = info_dict

# local folder creation
dest_dir = ctx.get_local_dir(client)
with open(os.path.join(dest_dir, ProvFileName.RESOURCES_JSON_DEFAULT), "rt") as f:
resources = json.load(f)
resources["components"].append(
{
"id": "docker_launcher",
"path": ClientDockerJobLauncher().__module__ + "." + "ClientDockerJobLauncher",
"args": {},
}
)
utils.write(os.path.join(dest_dir, ProvFileName.RESOURCES_JSON_DEFAULT), json.dumps(resources, indent=4), "t")

communication_port = client.get_prop(PropKey.DOCKER_COMM_PORT)
if communication_port:
replacement_dict = {"comm_host_name": client.name + "-parent", "communication_port": communication_port}
ctx.build_from_template(
dest_dir,
TemplateSectionKey.COMM_CONFIG,
ProvFileName.COMM_CONFIG,
replacement=replacement_dict,
exe=True,
)

dest_dir = ctx.get_kit_dir(client)
replacement_dict = {
"admin_port": admin_port,
"fed_learn_port": fed_learn_port,
"comm_host_name": "server-parent",
"communication_port": communication_port,
"docker_image": self.docker_image,
}
ctx.build_from_template(
dest_dir,
TemplateSectionKey.DOCKER_LAUNCHER_CLIENT_SH,
ProvFileName.DOCKER_LAUNCHER_SH,
replacement=replacement_dict,
exe=True,
)

def build(self, project: Project, ctx: ProvisionContext):
compose = ctx.yaml_load_template_section(TemplateSectionKey.COMPOSE_YAML)
self.services = compose.get("services")
self.compose_file_path = os.path.join(ctx.get_wip_dir(), ProvFileName.COMPOSE_YAML)
overseer = project.get_overseer()
if overseer:
self._build_overseer(overseer)
server = project.get_server()
if server:
self._build_server(server, ctx)

for client in project.get_clients():
self._build_client(client, ctx)

self.services.pop("__overseer__", None)
self.services.pop("__flserver__", None)
self.services.pop("__flclient__", None)
compose["services"] = self.services
with open(self.compose_file_path, "wt") as f:
yaml.dump(compose, f)
env_file_path = os.path.join(ctx.get_wip_dir(), ProvFileName.ENV)
with open(env_file_path, "wt") as f:
f.write("WORKSPACE=/workspace\n")
f.write("PYTHON_EXECUTABLE=/usr/local/bin/python3\n")
f.write("IMAGE_NAME=nvflare-service\n")
compose_build_dir = os.path.join(ctx.get_wip_dir(), ProvFileName.COMPOSE_BUILD_DIR)
os.makedirs(compose_build_dir, exist_ok=True)
with open(os.path.join(compose_build_dir, ProvFileName.LAUNCHER_DOCKERFILE), "wt") as f:
f.write(f"FROM {self.base_image}\n")
f.write(ctx.get_template_section(TemplateSectionKey.LAUNCHER_DOCKERFILE))
replacement_dict = {"image": self.docker_image}
ctx.build_from_template(
compose_build_dir,
TemplateSectionKey.DOCKER_BUILD_SH,
ProvFileName.DOCKER_BUILD_SH,
replacement=replacement_dict,
exe=True,
)
try:
shutil.copyfile(self.requirements_file, os.path.join(compose_build_dir, ProvFileName.REQUIREMENTS_TXT))
except Exception:
f = open(os.path.join(compose_build_dir, ProvFileName.REQUIREMENTS_TXT), "wt")
f.close()
Loading
Loading