Skip to content

Commit 7414617

Browse files
updated to add scan & zip changes
1 parent 5cdb01f commit 7414617

File tree

4 files changed

+290
-9
lines changed

4 files changed

+290
-9
lines changed

src/datacustomcode/cli.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,35 @@ def configure(
6868
login_url=login_url,
6969
).update_ini(profile=profile)
7070

71+
@cli.command()
72+
@click.option("--profile", default="default")
73+
@click.option("--path", default="payload")
74+
@click.option("--name", default="test_pkg")
75+
@click.option("--version", default="0.0.1")
76+
@click.option("--description", default="Custom Data Transform Code")
77+
def zip(profile: str, path: str, name: str, version: str, description: str):
78+
from datacustomcode.credentials import Credentials
79+
from datacustomcode.deploy import TransformationJobMetadata, zip, zip_and_upload_directory
80+
81+
logger.debug("Zipping project")
82+
83+
metadata = TransformationJobMetadata(
84+
name=name,
85+
version=version,
86+
description=description,
87+
)
88+
try:
89+
credentials = Credentials.from_ini(profile=profile)
90+
except KeyError:
91+
click.secho(
92+
f"Error: Profile {profile} not found in credentials.ini. "
93+
"Run `datacustomcode configure` to create a credentialsprofile.",
94+
fg="red",
95+
)
96+
raise click.Abort() from None
97+
zip(path, metadata, credentials, name)
98+
99+
71100

72101
@cli.command()
73102
@click.option("--profile", default="default")

src/datacustomcode/deploy.py

Lines changed: 81 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -169,23 +169,41 @@ def prepare_dependency_archive(directory: str) -> None:
169169
archive_file = os.path.join(archives_dir, DEPENDENCIES_ARCHIVE_NAME)
170170
with tarfile.open(archive_file, "w:gz") as tar:
171171
for file in os.listdir(temp_dir):
172+
# Exclude requirements.txt from the archive
173+
if file == "requirements.txt":
174+
continue
172175
tar.add(os.path.join(temp_dir, file), arcname=file)
173176

174177
logger.debug(f"Dependencies downloaded and archived to {archive_file}")
175178

176179

177-
def zip_and_upload_directory(directory: str, file_upload_url: str) -> None:
178-
file_upload_url = unescape(file_upload_url)
180+
def zip_and_upload_directory(directory: str, name: str) -> None:
181+
# file_upload_url = unescape(file_upload_url)
179182

180183
logger.debug(f"Zipping directory... {directory}")
181-
shutil.make_archive(ZIP_FILE_NAME.rstrip(".zip"), "zip", directory)
182184

183-
logger.debug(f"Uploading deployment to {file_upload_url}")
184-
with open(ZIP_FILE_NAME, "rb") as zip_file:
185-
response = requests.put(
186-
file_upload_url, data=zip_file, headers={"Content-Type": "application/zip"}
187-
)
188-
response.raise_for_status()
185+
# Create a zip file excluding .DS_Store files
186+
import zipfile
187+
188+
zip_filename = f"{name}.zip"
189+
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
190+
for root, dirs, files in os.walk(directory):
191+
# Skip .DS_Store files when adding to zip
192+
for file in files:
193+
if file != '.DS_Store':
194+
file_path = os.path.join(root, file)
195+
# Preserve relative path structure in the zip file
196+
arcname = os.path.relpath(file_path, directory)
197+
zipf.write(file_path, arcname)
198+
199+
logger.debug(f"Created zip file: {zip_filename} (excluding .DS_Store files)")
200+
201+
# logger.debug(f"Uploading deployment to {file_upload_url}")
202+
# with open(ZIP_FILE_NAME, "rb") as zip_file:
203+
# response = requests.put(
204+
# file_upload_url, data=zip_file, headers={"Content-Type": "application/zip"}
205+
# )
206+
# response.raise_for_status()
189207

190208

191209
class DeploymentsResponse(BaseModel):
@@ -324,6 +342,60 @@ def create_data_transform(
324342
response = _make_api_call(url, "POST", token=access_token.access_token, json=body)
325343
return response
326344

345+
def has_nonempty_requirements_file(directory: str) -> bool:
346+
"""
347+
Check if requirements.txt exists in the given directory and has at least one non-comment line.
348+
Args:
349+
directory (str): The directory to check for requirements.txt.
350+
Returns:
351+
bool: True if requirements.txt exists and has a non-comment line, False otherwise.
352+
"""
353+
# Look for requirements.txt in the parent directory of the given directory
354+
requirements_path = os.path.join(os.path.dirname(directory), "requirements.txt")
355+
print(requirements_path)
356+
357+
try:
358+
if os.path.isfile(requirements_path):
359+
#print the contents of the file
360+
with open(requirements_path, "r", encoding="utf-8") as f:
361+
print(f.read())
362+
with open(requirements_path, "r", encoding="utf-8") as f:
363+
for line in f:
364+
# Consider non-empty if any line is not a comment (ignoring leading whitespace)
365+
if line.strip() and not line.lstrip().startswith('#'):
366+
return True
367+
except Exception as e:
368+
logger.error(f"Error reading requirements.txt: {e}")
369+
return False
370+
371+
372+
def zip(
373+
directory: str,
374+
metadata: TransformationJobMetadata,
375+
credentials: Credentials,
376+
name: str,
377+
callback=None,
378+
) -> AccessTokenResponse:
379+
"""Deploy a data transform in the DataCloud."""
380+
access_token = _retrieve_access_token(credentials)
381+
382+
# prepare payload only if requirements.txt is non-empty
383+
if has_nonempty_requirements_file(directory):
384+
prepare_dependency_archive(directory)
385+
else:
386+
logger.info(f"Skipping dependency archive: requirements.txt is missing or empty in {directory}")
387+
# create_data_transform_config(directory)
388+
389+
# create deployment and upload payload
390+
# deployment = create_deployment(access_token, metadata)
391+
zip_and_upload_directory(directory, name)
392+
#, deployment.fileUploadUrl)
393+
# wait_for_deployment(access_token, metadata, callback)
394+
395+
# create data transform
396+
# create_data_transform(directory, access_token, metadata)
397+
return access_token
398+
327399

328400
def deploy_full(
329401
directory: str,

src/datacustomcode/scan.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
from __future__ import annotations
16+
import os
17+
from typing import Union, Dict, List, FrozenSet, Set
1618

1719
import ast
1820
from typing import (
@@ -131,6 +133,98 @@ def found(self) -> DataAccessLayerCalls:
131133
)
132134

133135

136+
class ImportVisitor(ast.NodeVisitor):
137+
"""AST Visitor that extracts external package imports from Python code."""
138+
139+
# Standard library modules that should be excluded from requirements
140+
STANDARD_LIBS = {
141+
"abc", "argparse", "ast", "asyncio", "base64", "collections", "configparser",
142+
"contextlib", "copy", "csv", "datetime", "enum", "functools", "glob", "hashlib",
143+
"http", "importlib", "inspect", "io", "itertools", "json", "logging", "math",
144+
"os", "pathlib", "pickle", "random", "re", "shutil", "site", "socket", "sqlite3",
145+
"string", "subprocess", "sys", "tempfile", "threading", "time", "traceback",
146+
"typing", "uuid", "warnings", "xml", "zipfile"
147+
}
148+
149+
# Additional packages to exclude from requirements.txt
150+
EXCLUDED_PACKAGES = {
151+
"datacustomcode", # Internal package
152+
"pyspark", # Provided by the runtime environment
153+
}
154+
155+
def __init__(self) -> None:
156+
self.imports: Set[str] = set()
157+
158+
def visit_Import(self, node: ast.Import) -> None:
159+
"""Visit an import statement (e.g., import os, sys)."""
160+
for name in node.names:
161+
# Get the top-level package name
162+
package = name.name.split('.')[0]
163+
if (package not in self.STANDARD_LIBS and
164+
package not in self.EXCLUDED_PACKAGES and
165+
not package.startswith('_')):
166+
self.imports.add(package)
167+
self.generic_visit(node)
168+
169+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
170+
"""Visit a from-import statement (e.g., from os import path)."""
171+
if node.module is not None:
172+
# Get the top-level package
173+
package = node.module.split('.')[0]
174+
if (package not in self.STANDARD_LIBS and
175+
package not in self.EXCLUDED_PACKAGES and
176+
not package.startswith('_')):
177+
self.imports.add(package)
178+
self.generic_visit(node)
179+
180+
181+
def scan_file_for_imports(file_path: str) -> Set[str]:
182+
"""Scan a Python file for external package imports."""
183+
with open(file_path, "r") as f:
184+
code = f.read()
185+
tree = ast.parse(code)
186+
visitor = ImportVisitor()
187+
visitor.visit(tree)
188+
return visitor.imports
189+
190+
191+
def write_requirements_file(file_path: str, output_dir: str = None) -> str:
192+
"""
193+
Scan a Python file for imports and write them to requirements.txt.
194+
195+
Args:
196+
file_path: Path to the Python file to scan
197+
output_dir: Directory where requirements.txt should be created (defaults to parent directory)
198+
199+
Returns:
200+
Path to the generated requirements.txt file
201+
"""
202+
imports = scan_file_for_imports(file_path)
203+
204+
if not output_dir:
205+
# Use the parent directory rather than same directory as the file
206+
file_dir = os.path.dirname(file_path)
207+
output_dir = os.path.dirname(file_dir) if file_dir else "."
208+
209+
requirements_path = os.path.join(output_dir, "requirements.txt")
210+
211+
# If the file exists, read existing requirements and merge with new ones
212+
existing_requirements = set()
213+
if os.path.exists(requirements_path):
214+
with open(requirements_path, "r") as f:
215+
existing_requirements = {line.strip() for line in f if line.strip()}
216+
217+
# Merge existing requirements with newly discovered ones
218+
all_requirements = existing_requirements.union(imports)
219+
220+
# Write the combined requirements
221+
with open(requirements_path, "w") as f:
222+
for package in sorted(all_requirements):
223+
f.write(f"{package}\n")
224+
225+
return requirements_path
226+
227+
134228
def scan_file(file_path: str) -> DataAccessLayerCalls:
135229
"""Scan a single Python file for Client read/write method calls."""
136230
with open(file_path, "r") as f:
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "253d95db-fdc6-4bbb-b75c-20b46639f2d3",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"from datacustomcode.client import Client\n",
11+
"from datacustomcode.io.writer.base import WriteMode\n",
12+
"from pyspark.sql.functions import col, upper"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": null,
18+
"id": "debdfc62-489b-4ca8-af1d-56c60c0d32e7",
19+
"metadata": {},
20+
"outputs": [],
21+
"source": [
22+
"client = Client() \n",
23+
" \n",
24+
"df = client.read_dlo(\"Account_Home__dll\")\n"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": null,
30+
"id": "d96ad7c8-f5ba-44a7-a2ad-8597beb20cf4",
31+
"metadata": {},
32+
"outputs": [],
33+
"source": [
34+
"# Perform transformations on the DataFrame\n",
35+
"df_upper1 = df.withColumn(\"Description__c\", upper(col(\"Description__c\")))\n",
36+
"\n",
37+
"# Drop specific columns related to relationships\n",
38+
"df_upper1 = df_upper1.drop(\"KQ_ParentId__c\")\n",
39+
"df_upper1 = df_upper1.drop(\"KQ_Id__c\")\n",
40+
"\n",
41+
"df_upper1.show()\n"
42+
]
43+
},
44+
{
45+
"cell_type": "code",
46+
"execution_count": null,
47+
"id": "6f823139-3a22-487f-a4a1-966c6269a708",
48+
"metadata": {},
49+
"outputs": [],
50+
"source": [
51+
"# Save the transformed DataFrame\n",
52+
"dlo_name = 'Account_Home_copy__dll'\n",
53+
"client.write_to_dlo(dlo_name, df_upper1, write_mode=WriteMode.APPEND)\n"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": null,
59+
"id": "425f383b-09b4-45ee-957c-f215d7a2ccf2",
60+
"metadata": {},
61+
"outputs": [],
62+
"source": []
63+
}
64+
],
65+
"metadata": {
66+
"kernelspec": {
67+
"display_name": "Python 3 (ipykernel)",
68+
"language": "python",
69+
"name": "python3"
70+
},
71+
"language_info": {
72+
"codemirror_mode": {
73+
"name": "ipython",
74+
"version": 3
75+
},
76+
"file_extension": ".py",
77+
"mimetype": "text/x-python",
78+
"name": "python",
79+
"nbconvert_exporter": "python",
80+
"pygments_lexer": "ipython3",
81+
"version": "3.11.11"
82+
}
83+
},
84+
"nbformat": 4,
85+
"nbformat_minor": 5
86+
}

0 commit comments

Comments
 (0)