Skip to content

Commit 83b4af1

Browse files
committed
Zip and lint cleanup
1 parent fefa2a3 commit 83b4af1

File tree

4 files changed

+122
-117
lines changed

4 files changed

+122
-117
lines changed

src/datacustomcode/cli.py

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -68,34 +68,14 @@ def configure(
6868
login_url=login_url,
6969
).update_ini(profile=profile)
7070

71+
7172
@cli.command()
72-
@click.option("--profile", default="default")
73-
@click.option("--path", default="payload")
74-
@click.option("--name", default="test_pkg")
75-
@click.option("--version", default="0.0.1")
76-
@click.option("--description", default="Custom Data Transform Code")
77-
def zip(profile: str, path: str, name: str, version: str, description: str):
78-
from datacustomcode.credentials import Credentials
79-
from datacustomcode.deploy import TransformationJobMetadata, zip, zip_and_upload_directory
73+
@click.argument("path", default="payload")
74+
def zip(path: str):
75+
from datacustomcode.deploy import zip
8076

8177
logger.debug("Zipping project")
82-
83-
metadata = TransformationJobMetadata(
84-
name=name,
85-
version=version,
86-
description=description,
87-
)
88-
try:
89-
credentials = Credentials.from_ini(profile=profile)
90-
except KeyError:
91-
click.secho(
92-
f"Error: Profile {profile} not found in credentials.ini. "
93-
"Run `datacustomcode configure` to create a credentialsprofile.",
94-
fg="red",
95-
)
96-
raise click.Abort() from None
97-
zip(path, metadata, credentials, name)
98-
78+
zip(path)
9979

10080

10181
@cli.command()
@@ -156,11 +136,12 @@ def init(directory: str):
156136
@click.argument("filename")
157137
@click.option("--config")
158138
@click.option("--dry-run", is_flag=True)
159-
@click.option("--no-requirements", is_flag=True, help="Skip generating requirements.txt file")
139+
@click.option(
140+
"--no-requirements", is_flag=True, help="Skip generating requirements.txt file"
141+
)
160142
def scan(filename: str, config: str, dry_run: bool, no_requirements: bool):
161143
from datacustomcode.scan import dc_config_json_from_file, write_requirements_file
162144

163-
164145
config_location = config or os.path.join(os.path.dirname(filename), "config.json")
165146
click.echo(
166147
"Dumping scan results to config file: "

src/datacustomcode/deploy.py

Lines changed: 38 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -177,35 +177,6 @@ def prepare_dependency_archive(directory: str) -> None:
177177
logger.debug(f"Dependencies downloaded and archived to {archive_file}")
178178

179179

180-
def zip_and_upload_directory(directory: str, name: str) -> None:
181-
# file_upload_url = unescape(file_upload_url)
182-
183-
logger.debug(f"Zipping directory... {directory}")
184-
185-
# Create a zip file excluding .DS_Store files
186-
import zipfile
187-
188-
zip_filename = f"{name}.zip"
189-
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
190-
for root, dirs, files in os.walk(directory):
191-
# Skip .DS_Store files when adding to zip
192-
for file in files:
193-
if file != '.DS_Store':
194-
file_path = os.path.join(root, file)
195-
# Preserve relative path structure in the zip file
196-
arcname = os.path.relpath(file_path, directory)
197-
zipf.write(file_path, arcname)
198-
199-
logger.debug(f"Created zip file: {zip_filename} (excluding .DS_Store files)")
200-
201-
# logger.debug(f"Uploading deployment to {file_upload_url}")
202-
# with open(ZIP_FILE_NAME, "rb") as zip_file:
203-
# response = requests.put(
204-
# file_upload_url, data=zip_file, headers={"Content-Type": "application/zip"}
205-
# )
206-
# response.raise_for_status()
207-
208-
209180
class DeploymentsResponse(BaseModel):
210181
deploymentStatus: str
211182

@@ -342,59 +313,70 @@ def create_data_transform(
342313
response = _make_api_call(url, "POST", token=access_token.access_token, json=body)
343314
return response
344315

316+
345317
def has_nonempty_requirements_file(directory: str) -> bool:
346318
"""
347-
Check if requirements.txt exists in the given directory and has at least one non-comment line.
319+
Check if requirements.txt exists in the given directory and has at least
320+
one non-comment line.
348321
Args:
349322
directory (str): The directory to check for requirements.txt.
350323
Returns:
351-
bool: True if requirements.txt exists and has a non-comment line, False otherwise.
324+
bool: True if requirements.txt exists and has a non-comment line,
325+
False otherwise.
352326
"""
353327
# Look for requirements.txt in the parent directory of the given directory
354328
requirements_path = os.path.join(os.path.dirname(directory), "requirements.txt")
355-
print(requirements_path)
356329

357330
try:
358331
if os.path.isfile(requirements_path):
359-
#print the contents of the file
360-
with open(requirements_path, "r", encoding="utf-8") as f:
361-
print(f.read())
362332
with open(requirements_path, "r", encoding="utf-8") as f:
363333
for line in f:
364-
# Consider non-empty if any line is not a comment (ignoring leading whitespace)
365-
if line.strip() and not line.lstrip().startswith('#'):
334+
# Consider non-empty if any line is not a comment (ignoring
335+
# leading whitespace)
336+
if line.strip() and not line.lstrip().startswith("#"):
366337
return True
367338
except Exception as e:
368339
logger.error(f"Error reading requirements.txt: {e}")
369340
return False
370341

371342

343+
def upload_zip(file_upload_url: str) -> None:
344+
file_upload_url = unescape(file_upload_url)
345+
with open(ZIP_FILE_NAME, "rb") as zip_file:
346+
response = requests.put(
347+
file_upload_url, data=zip_file, headers={"Content-Type": "application/zip"}
348+
)
349+
response.raise_for_status()
350+
351+
372352
def zip(
373353
directory: str,
374-
metadata: TransformationJobMetadata,
375-
credentials: Credentials,
376-
name: str,
377-
callback=None,
378-
) -> AccessTokenResponse:
379-
"""Deploy a data transform in the DataCloud."""
380-
access_token = _retrieve_access_token(credentials)
354+
):
355+
# Create a zip file excluding .DS_Store files
356+
import zipfile
381357

382358
# prepare payload only if requirements.txt is non-empty
383359
if has_nonempty_requirements_file(directory):
384360
prepare_dependency_archive(directory)
385361
else:
386-
logger.info(f"Skipping dependency archive: requirements.txt is missing or empty in {directory}")
387-
# create_data_transform_config(directory)
362+
logger.info(
363+
f"Skipping dependency archive: requirements.txt is missing or empty "
364+
f"in {directory}"
365+
)
388366

389-
# create deployment and upload payload
390-
# deployment = create_deployment(access_token, metadata)
391-
zip_and_upload_directory(directory, name)
392-
#, deployment.fileUploadUrl)
393-
# wait_for_deployment(access_token, metadata, callback)
367+
logger.debug(f"Zipping directory... {directory}")
394368

395-
# create data transform
396-
# create_data_transform(directory, access_token, metadata)
397-
return access_token
369+
with zipfile.ZipFile(ZIP_FILE_NAME, "w", zipfile.ZIP_DEFLATED) as zipf:
370+
for root, dirs, files in os.walk(directory):
371+
# Skip .DS_Store files when adding to zip
372+
for file in files:
373+
if file != ".DS_Store":
374+
file_path = os.path.join(root, file)
375+
# Preserve relative path structure in the zip file
376+
arcname = os.path.relpath(file_path, directory)
377+
zipf.write(file_path, arcname)
378+
379+
logger.debug(f"Created zip file: {ZIP_FILE_NAME}")
398380

399381

400382
def deploy_full(
@@ -412,7 +394,8 @@ def deploy_full(
412394

413395
# create deployment and upload payload
414396
deployment = create_deployment(access_token, metadata)
415-
zip_and_upload_directory(directory, deployment.fileUploadUrl)
397+
zip(directory)
398+
upload_zip(deployment.fileUploadUrl)
416399
wait_for_deployment(access_token, metadata, callback)
417400

418401
# create data transform

src/datacustomcode/scan.py

Lines changed: 65 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
from __future__ import annotations
16-
import os
17-
from typing import Union, Dict, List, FrozenSet, Set
1816

1917
import ast
18+
import os
2019
from typing import (
2120
Any,
21+
ClassVar,
2222
Dict,
23+
Set,
2324
Union,
2425
)
2526

@@ -137,17 +138,55 @@ class ImportVisitor(ast.NodeVisitor):
137138
"""AST Visitor that extracts external package imports from Python code."""
138139

139140
# Standard library modules that should be excluded from requirements
140-
STANDARD_LIBS = {
141-
"abc", "argparse", "ast", "asyncio", "base64", "collections", "configparser",
142-
"contextlib", "copy", "csv", "datetime", "enum", "functools", "glob", "hashlib",
143-
"http", "importlib", "inspect", "io", "itertools", "json", "logging", "math",
144-
"os", "pathlib", "pickle", "random", "re", "shutil", "site", "socket", "sqlite3",
145-
"string", "subprocess", "sys", "tempfile", "threading", "time", "traceback",
146-
"typing", "uuid", "warnings", "xml", "zipfile"
141+
STANDARD_LIBS: ClassVar[set[str]] = {
142+
"abc",
143+
"argparse",
144+
"ast",
145+
"asyncio",
146+
"base64",
147+
"collections",
148+
"configparser",
149+
"contextlib",
150+
"copy",
151+
"csv",
152+
"datetime",
153+
"enum",
154+
"functools",
155+
"glob",
156+
"hashlib",
157+
"http",
158+
"importlib",
159+
"inspect",
160+
"io",
161+
"itertools",
162+
"json",
163+
"logging",
164+
"math",
165+
"os",
166+
"pathlib",
167+
"pickle",
168+
"random",
169+
"re",
170+
"shutil",
171+
"site",
172+
"socket",
173+
"sqlite3",
174+
"string",
175+
"subprocess",
176+
"sys",
177+
"tempfile",
178+
"threading",
179+
"time",
180+
"traceback",
181+
"typing",
182+
"uuid",
183+
"warnings",
184+
"xml",
185+
"zipfile",
147186
}
148187

149188
# Additional packages to exclude from requirements.txt
150-
EXCLUDED_PACKAGES = {
189+
EXCLUDED_PACKAGES: ClassVar[set[str]] = {
151190
"datacustomcode", # Internal package
152191
"pyspark", # Provided by the runtime environment
153192
}
@@ -159,21 +198,25 @@ def visit_Import(self, node: ast.Import) -> None:
159198
"""Visit an import statement (e.g., import os, sys)."""
160199
for name in node.names:
161200
# Get the top-level package name
162-
package = name.name.split('.')[0]
163-
if (package not in self.STANDARD_LIBS and
164-
package not in self.EXCLUDED_PACKAGES and
165-
not package.startswith('_')):
201+
package = name.name.split(".")[0]
202+
if (
203+
package not in self.STANDARD_LIBS
204+
and package not in self.EXCLUDED_PACKAGES
205+
and not package.startswith("_")
206+
):
166207
self.imports.add(package)
167208
self.generic_visit(node)
168209

169210
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
170211
"""Visit a from-import statement (e.g., from os import path)."""
171212
if node.module is not None:
172213
# Get the top-level package
173-
package = node.module.split('.')[0]
174-
if (package not in self.STANDARD_LIBS and
175-
package not in self.EXCLUDED_PACKAGES and
176-
not package.startswith('_')):
214+
package = node.module.split(".")[0]
215+
if (
216+
package not in self.STANDARD_LIBS
217+
and package not in self.EXCLUDED_PACKAGES
218+
and not package.startswith("_")
219+
):
177220
self.imports.add(package)
178221
self.generic_visit(node)
179222

@@ -188,23 +231,21 @@ def scan_file_for_imports(file_path: str) -> Set[str]:
188231
return visitor.imports
189232

190233

191-
def write_requirements_file(file_path: str, output_dir: str = None) -> str:
234+
def write_requirements_file(file_path: str) -> str:
192235
"""
193236
Scan a Python file for imports and write them to requirements.txt.
194237
195238
Args:
196239
file_path: Path to the Python file to scan
197-
output_dir: Directory where requirements.txt should be created (defaults to parent directory)
198240
199241
Returns:
200242
Path to the generated requirements.txt file
201243
"""
202244
imports = scan_file_for_imports(file_path)
203245

204-
if not output_dir:
205-
# Use the parent directory rather than same directory as the file
206-
file_dir = os.path.dirname(file_path)
207-
output_dir = os.path.dirname(file_dir) if file_dir else "."
246+
# Use the parent directory rather than same directory as the file
247+
file_dir = os.path.dirname(file_path)
248+
output_dir = os.path.dirname(file_dir) if file_dir else "."
208249

209250
requirements_path = os.path.join(output_dir, "requirements.txt")
210251

0 commit comments

Comments
 (0)