Skip to content

Commit 2baa80d

Browse files
authored
Merge pull request #8 from forcedotcom/sdkVersion-config-2
Refactor config.json related parts of scan.py and deploy.py
2 parents 90a9799 + 7351d5d commit 2baa80d

File tree

7 files changed

+276
-151
lines changed

7 files changed

+276
-151
lines changed

src/datacustomcode/cli.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,25 @@ def deploy(profile: str, path: str, name: str, version: str, description: str):
101101
@cli.command()
102102
@click.argument("directory", default=".")
103103
def init(directory: str):
104+
from datacustomcode.scan import dc_config_json_from_file
104105
from datacustomcode.template import copy_template
105106

106107
click.echo("Copying template to " + click.style(directory, fg="blue", bold=True))
107108
copy_template(directory)
109+
entrypoint_path = os.path.join(directory, "payload", "entrypoint.py")
110+
config_location = os.path.join(os.path.dirname(entrypoint_path), "config.json")
111+
config_json = dc_config_json_from_file(entrypoint_path)
112+
with open(config_location, "w") as f:
113+
json.dump(config_json, f, indent=2)
114+
108115
click.echo(
109116
"Start developing by updating the code in "
110-
+ click.style(f"{directory}/payload/entrypoint.py", fg="blue", bold=True)
117+
+ click.style(entrypoint_path, fg="blue", bold=True)
118+
)
119+
click.echo(
120+
"You can run "
121+
+ click.style(f"datacustomcode scan {entrypoint_path}", fg="blue", bold=True)
122+
+ " to automatically update config.json when you make changes to your code"
111123
)
112124

113125

src/datacustomcode/deploy.py

Lines changed: 57 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,11 @@
3131
)
3232

3333
from loguru import logger
34+
import pydantic
3435
from pydantic import BaseModel
3536
import requests
3637

3738
from datacustomcode.cmd import cmd_output
38-
from datacustomcode.scan import scan_file
39-
from datacustomcode.version import get_version
4039

4140
if TYPE_CHECKING:
4241
from datacustomcode.credentials import Credentials
@@ -79,8 +78,10 @@ def _make_api_call(
7978
logger.debug(f"Request params: {kwargs}")
8079

8180
response = requests.request(method=method, url=url, headers=headers, **kwargs)
82-
response.raise_for_status()
8381
json_response = response.json()
82+
if response.status_code >= 400:
83+
logger.debug(f"Error Response: {json_response}")
84+
response.raise_for_status()
8485
assert isinstance(
8586
json_response, dict
8687
), f"Unexpected response type: {type(json_response)}"
@@ -225,89 +226,62 @@ def wait_for_deployment(
225226
callback(status)
226227
if status == "Deployed":
227228
logger.debug(
228-
"Deployment completed, Elapsed time: {time.time() - start_time}"
229+
f"Deployment completed.\nElapsed time: {time.time() - start_time}"
229230
)
230231
break
231232
time.sleep(1)
232233

233234

234235
DATA_TRANSFORM_REQUEST_TEMPLATE: dict[str, Any] = {
235-
"metadata": {
236-
"dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v8.json",
237-
"dbt_version": "1.4.6",
238-
"generated_at": "2023-04-25T18:54:11.375589Z",
239-
"invocation_id": "d6c68c69-533a-4d54-861e-1493d6cd8092",
240-
"env": {},
241-
"project_id": "jaffle_shop",
242-
"user_id": "1ca8403c-a1a5-43af-8b88-9265e948b9d2",
243-
"send_anonymous_usage_stats": True,
244-
"adapter_type": "spark",
245-
},
246-
"nodes": {
247-
"model.dcexample.dim_listings_w_hosts": {
248-
"name": "dim_listings_w_hosts",
249-
"resource_type": "model",
250-
"relation_name": "{OUTPUT_DLO}",
251-
"config": {"materialized": "table"},
252-
"compiled_code": "",
253-
"depends_on": {"nodes": []},
254-
}
255-
},
256-
"sources": {
257-
"source.dcexample.listings": {
258-
"name": "listings",
259-
"resource_type": "source",
260-
"relation_name": "{INPUT_DLO}",
261-
"identifier": "{INPUT_DLO}",
262-
}
263-
},
236+
"nodes": {},
237+
"sources": {},
264238
"macros": {
265-
"macro.dcexample.byoc": {
266-
"name": "byoc_example",
267-
"resource_type": "macro",
268-
"path": "",
269-
"original_file_path": "",
270-
"unique_id": "unique id",
271-
"macro_sql": "",
272-
"supported_languages": None,
239+
"macro.byoc": {
273240
"arguments": [{"name": "{SCRIPT_NAME}", "type": "BYOC_SCRIPT"}],
274241
}
275242
},
276243
}
277244

278245

279246
class DataTransformConfig(BaseModel):
280-
input: Union[str, list[str]]
281-
output: Union[str, list[str]]
247+
sdkVersion: str
248+
entryPoint: str
249+
dataspace: str
250+
permissions: Permissions
282251

283252

284-
DATA_TRANSFORM_CONFIG_TEMPLATE: dict[str, Any] = {
285-
"entryPoint": "entrypoint.py",
286-
"dataspace": "default",
287-
"permissions": {"read": {"dlo": ""}, "write": {"dlo": ""}},
288-
"sdkVersion": get_version(),
289-
}
253+
class Permissions(BaseModel):
254+
read: Union[DloPermission]
255+
write: Union[DloPermission]
290256

291257

292-
def get_data_transform_config(directory: str) -> DataTransformConfig:
293-
"""Get the data transform config from the entrypoint.py file."""
294-
entrypoint_file = os.path.join(directory, "entrypoint.py")
295-
data_access_layer_calls = scan_file(entrypoint_file)
296-
input_ = data_access_layer_calls.input_str
297-
output = data_access_layer_calls.output_str
298-
return DataTransformConfig(input=input_, output=output)
258+
class DloPermission(BaseModel):
259+
dlo: list[str]
299260

300261

301-
def create_data_transform_config(directory: str) -> None:
302-
"""Create a data transform config.json file in the directory."""
303-
data_transform_config = get_data_transform_config(directory)
304-
request_hydrated = DATA_TRANSFORM_CONFIG_TEMPLATE.copy()
305-
request_hydrated["permissions"]["read"]["dlo"] = data_transform_config.input
306-
request_hydrated["permissions"]["write"]["dlo"] = data_transform_config.output
307-
logger.debug(f"Creating data transform config in {directory}")
308-
json.dump(
309-
request_hydrated, open(os.path.join(directory, "config.json"), "w"), indent=4
310-
)
262+
def get_data_transform_config(directory: str) -> DataTransformConfig:
263+
"""Get the data transform config from the config.json file."""
264+
config_path = os.path.join(directory, "config.json")
265+
try:
266+
with open(config_path, "r") as f:
267+
config = json.loads(f.read())
268+
return DataTransformConfig(**config)
269+
except FileNotFoundError as err:
270+
raise FileNotFoundError(f"config.json not found at {config_path}") from err
271+
except json.JSONDecodeError as err:
272+
raise ValueError(f"config.json at {config_path} is not valid JSON") from err
273+
except pydantic.ValidationError as err:
274+
missing_fields = [str(err["loc"][0]) for err in err.errors()]
275+
raise ValueError(
276+
f"config.json at {config_path} is missing required "
277+
f"fields: {', '.join(missing_fields)}"
278+
) from err
279+
280+
281+
def verify_data_transform_config(directory: str) -> None:
282+
"""Verify the data transform config.json contents."""
283+
get_data_transform_config(directory)
284+
logger.debug(f"Verified data transform config in {directory}")
311285

312286

313287
def create_data_transform(
@@ -319,28 +293,31 @@ def create_data_transform(
319293
script_name = metadata.name
320294
data_transform_config = get_data_transform_config(directory)
321295
request_hydrated = DATA_TRANSFORM_REQUEST_TEMPLATE.copy()
322-
request_hydrated["nodes"]["model.dcexample.dim_listings_w_hosts"][
323-
"relation_name"
324-
] = data_transform_config.input
325-
request_hydrated["sources"]["source.dcexample.listings"][
326-
"relation_name"
327-
] = data_transform_config.output
328-
request_hydrated["sources"]["source.dcexample.listings"][
329-
"identifier"
330-
] = data_transform_config.output
331-
request_hydrated["macros"]["macro.dcexample.byoc"]["arguments"][0][
332-
"name"
333-
] = script_name
296+
297+
# Add nodes for each write DLO
298+
for i, dlo in enumerate(data_transform_config.permissions.write.dlo, 1):
299+
request_hydrated["nodes"][f"node{i}"] = {
300+
"relation_name": dlo,
301+
"config": {"materialized": "table"},
302+
"compiled_code": "",
303+
}
304+
305+
# Add sources for each read DLO
306+
for i, dlo in enumerate(data_transform_config.permissions.read.dlo, 1):
307+
request_hydrated["sources"][f"source{i}"] = {"relation_name": dlo}
308+
309+
request_hydrated["macros"]["macro.byoc"]["arguments"][0]["name"] = script_name
334310

335311
body = {
336312
"definition": {
337-
"type": "DBT",
313+
"type": "DCSQL",
338314
"manifest": request_hydrated,
339315
"version": "56.0",
340316
},
341317
"label": f"{metadata.name}",
342318
"name": f"{metadata.name}",
343319
"type": "BATCH",
320+
"dataSpaceName": data_transform_config.dataspace,
344321
}
345322

346323
url = _join_strip_url(access_token.instance_url, DATA_TRANSFORMS_PATH)
@@ -359,7 +336,7 @@ def deploy_full(
359336

360337
# prepare payload
361338
prepare_dependency_archive(directory)
362-
create_data_transform_config(directory)
339+
verify_data_transform_config(directory)
363340

364341
# create deployment and upload payload
365342
deployment = create_deployment(access_token, metadata)

src/datacustomcode/scan.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,27 @@
1616

1717
import ast
1818
from typing import (
19+
Any,
1920
Dict,
20-
List,
2121
Union,
2222
)
2323

2424
import pydantic
2525

26+
from datacustomcode.version import get_version
27+
2628
DATA_ACCESS_METHODS = ["read_dlo", "read_dmo", "write_to_dlo", "write_to_dmo"]
2729

30+
DATA_TRANSFORM_CONFIG_TEMPLATE = {
31+
"sdkVersion": get_version(),
32+
"entryPoint": "",
33+
"dataspace": "default",
34+
"permissions": {
35+
"read": {},
36+
"write": {},
37+
},
38+
}
39+
2840

2941
class DataAccessLayerCalls(pydantic.BaseModel):
3042
read_dlo: frozenset[str]
@@ -129,25 +141,22 @@ def scan_file(file_path: str) -> DataAccessLayerCalls:
129141
return visitor.found()
130142

131143

132-
def dc_config_json_from_file(file_path: str) -> dict:
144+
def dc_config_json_from_file(file_path: str) -> dict[str, Any]:
133145
"""Create a Data Cloud Custom Code config JSON from a script."""
134146
output = scan_file(file_path)
135-
read = {}
147+
config = DATA_TRANSFORM_CONFIG_TEMPLATE.copy()
148+
config["entryPoint"] = file_path.rpartition("/")[-1]
149+
150+
read: dict[str, list[str]] = {}
136151
if output.read_dlo:
137152
read["dlo"] = list(output.read_dlo)
138153
else:
139154
read["dmo"] = list(output.read_dmo)
140-
write = {}
155+
write: dict[str, list[str]] = {}
141156
if output.write_to_dlo:
142157
write["dlo"] = list(output.write_to_dlo)
143158
else:
144159
write["dmo"] = list(output.write_to_dmo)
145-
config: Dict[str, Union[str, Dict[str, Dict[str, List[str]]]]] = {
146-
"entryPoint": file_path.rpartition("/")[-1],
147-
"dataspace": "default",
148-
"permissions": {
149-
"read": read,
150-
"write": write,
151-
},
152-
}
160+
161+
config["permissions"] = {"read": read, "write": write}
153162
return config

src/datacustomcode/templates/payload/entrypoint.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,24 @@
1+
from pyspark.sql.functions import col, upper
2+
13
from datacustomcode.client import Client
4+
from datacustomcode.io.writer.base import WriteMode
25

36

47
def main():
5-
client = Client() # noqa: F841
6-
# TODO: Add your custom code here
8+
client = Client()
9+
10+
df = client.read_dlo("Account_Home__dll")
11+
12+
# Perform transformations on the DataFrame
13+
df_upper1 = df.withColumn("Description__c", upper(col("Description__c")))
14+
15+
# Drop specific columns related to relationships
16+
df_upper1 = df_upper1.drop("KQ_ParentId__c")
17+
df_upper1 = df_upper1.drop("KQ_Id__c")
18+
19+
# Save the transformed DataFrame
20+
dlo_name = "Account_Home_copy__dll"
21+
client.write_to_dlo(dlo_name, df_upper1, write_mode=WriteMode.APPEND)
722

823

924
if __name__ == "__main__":

tests/test_cli.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import json
2+
import os
3+
from unittest.mock import mock_open, patch
4+
5+
from click.testing import CliRunner
6+
7+
from datacustomcode.cli import init
8+
9+
10+
class TestInit:
11+
@patch("datacustomcode.template.copy_template")
12+
@patch("datacustomcode.scan.dc_config_json_from_file")
13+
@patch("builtins.open", new_callable=mock_open)
14+
def test_init_command(self, mock_file, mock_scan, mock_copy):
15+
"""Test init command."""
16+
mock_scan.return_value = {
17+
"sdkVersion": "1.0.0",
18+
"entryPoint": "entrypoint.py",
19+
"dataspace": "default",
20+
"permissions": {
21+
"read": {"dlo": ["input_dlo"]},
22+
"write": {"dlo": ["output_dlo"]},
23+
},
24+
}
25+
26+
runner = CliRunner()
27+
with runner.isolated_filesystem():
28+
# Create test directory structure
29+
os.makedirs(os.path.join("test_dir", "payload"), exist_ok=True)
30+
31+
result = runner.invoke(init, ["test_dir"])
32+
33+
assert result.exit_code == 0
34+
mock_copy.assert_called_once_with("test_dir")
35+
mock_scan.assert_called_once_with(
36+
os.path.join("test_dir", "payload", "entrypoint.py")
37+
)
38+
39+
# Verify the config.json was written with the correct content
40+
mock_file.assert_any_call(
41+
os.path.join("test_dir", "payload", "config.json"), "w"
42+
)
43+
44+
# Get all write calls and join them to get the complete written content
45+
written_content = "".join(
46+
call.args[0] for call in mock_file().write.call_args_list
47+
)
48+
expected_content = json.dumps(mock_scan.return_value, indent=2)
49+
assert written_content == expected_content

0 commit comments

Comments
 (0)