Skip to content

Commit 108a0f1

Browse files
committed
Build requirements as native dependencies
1 parent 99b8e9e commit 108a0f1

File tree

4 files changed

+381
-18
lines changed

4 files changed

+381
-18
lines changed

src/datacustomcode/deploy.py

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,11 @@ def create_deployment(
140140
raise
141141

142142

143-
DOCKER_IMAGE_NAME = "datacloud-custom-code"
144-
DEPENDENCIES_ARCHIVE_NAME = "dependencies.tar.gz"
143+
PLATFORM_ENV_VAR = "DOCKER_DEFAULT_PLATFORM=linux/amd64"
144+
DOCKER_IMAGE_NAME = "datacloud-custom-code-dependency-builder"
145+
DEPENDENCIES_ARCHIVE_NAME = "native_dependencies"
146+
DEPENDENCIES_ARCHIVE_FULL_NAME = f"{DEPENDENCIES_ARCHIVE_NAME}.tar.gz"
147+
DEPENDENCIES_ARCHIVE_PATH = os.path.join("payload", "archives", DEPENDENCIES_ARCHIVE_FULL_NAME)
145148
ZIP_FILE_NAME = "deployment.zip"
146149

147150

@@ -150,31 +153,25 @@ def prepare_dependency_archive(directory: str) -> None:
150153
image_exists = cmd_output(cmd)
151154

152155
if not image_exists:
153-
logger.debug("Building docker image...")
154-
cmd = f"docker build -t {DOCKER_IMAGE_NAME} ."
156+
logger.info("Building docker image...")
157+
cmd = f"{PLATFORM_ENV_VAR} docker build -t {DOCKER_IMAGE_NAME} -f Dockerfile.dependencies ."
155158
cmd_output(cmd)
156159

157160
with tempfile.TemporaryDirectory() as temp_dir:
161+
logger.info("Building dependencies archive")
158162
shutil.copy("requirements.txt", temp_dir)
163+
shutil.copy("build_native_dependencies.sh", temp_dir)
159164
cmd = (
160-
f"docker run --rm "
161-
f"-v {temp_dir}:/dependencies "
165+
f"{PLATFORM_ENV_VAR} docker run --rm "
166+
f"-v {temp_dir}:/workspace "
162167
f"{DOCKER_IMAGE_NAME} "
163-
f'/bin/bash -c "cd /dependencies && pip download -r requirements.txt"'
168+
f'/bin/bash -c "./build_native_dependencies.sh"'
164169
)
165170
cmd_output(cmd)
171+
archives_temp_path = os.path.join(temp_dir, DEPENDENCIES_ARCHIVE_FULL_NAME)
172+
shutil.copy(archives_temp_path, DEPENDENCIES_ARCHIVE_PATH)
166173

167-
archives_dir = os.path.join(directory, "archives")
168-
os.makedirs(archives_dir, exist_ok=True)
169-
archive_file = os.path.join(archives_dir, DEPENDENCIES_ARCHIVE_NAME)
170-
with tarfile.open(archive_file, "w:gz") as tar:
171-
for file in os.listdir(temp_dir):
172-
# Exclude requirements.txt from the archive
173-
if file == "requirements.txt":
174-
continue
175-
tar.add(os.path.join(temp_dir, file), arcname=file)
176-
177-
logger.debug(f"Dependencies downloaded and archived to {archive_file}")
174+
logger.info(f"Dependencies archived to {DEPENDENCIES_ARCHIVE_PATH}")
178175

179176

180177
class DeploymentsResponse(BaseModel):
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM public.ecr.aws/emr-on-eks/spark/emr-7.3.0:latest
2+
3+
USER root
4+
5+
RUN pip3.11 install venv-pack
6+
7+
# Create workspace directory
8+
RUN mkdir /workspace
9+
WORKDIR /workspace
10+
11+
CMD ["./build_native_dependencies.sh"]
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
# Description: build native dependencies
4+
5+
pip3.11 install venv-pack
6+
7+
python3.11 -m venv --copies .venv
8+
source .venv/bin/activate
9+
pip install -r requirements.txt
10+
venv-pack -o native_dependencies.tar.gz -f

0 commit comments

Comments
 (0)