diff --git a/MANIFEST.in b/MANIFEST.in index 3727525..3ba07ef 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,4 @@ include *.txt include parsely_raw_data/*.thrift +include parsely_raw_data/dbt/redshift/* +recursive-include parsely_raw_data *.sql diff --git a/dbt/__init__.py b/dbt/__init__.py deleted file mode 100644 index c396168..0000000 --- a/dbt/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import absolute_import diff --git a/dbt/redshift/__init__.py b/dbt/redshift/__init__.py deleted file mode 100644 index c7b1bc9..0000000 --- a/dbt/redshift/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from __future__ import absolute_import - -from .redshift_etl import migrate_from_s3_by_day diff --git a/dbt/requirements.txt b/dbt/requirements.txt deleted file mode 100644 index 5815d69..0000000 --- a/dbt/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -dbt==0.17.2 -python-dateutil==2.8.1 -PyYAML==5.3.1 -sqlalchemy==1.3.20 \ No newline at end of file diff --git a/parsely_raw_data/__init__.py b/parsely_raw_data/__init__.py index cb6daab..935436b 100644 --- a/parsely_raw_data/__init__.py +++ b/parsely_raw_data/__init__.py @@ -14,9 +14,9 @@ limitations under the License. """ -__version__ = "2.4.0" +__version__ = "2.4.1" -from . import bigquery, docgen, redshift, s3, samples, schema, stream, utils +from . import bigquery, docgen, redshift, s3, samples, schema, stream, utils, dbt __all__ = [ "bigquery", @@ -27,6 +27,7 @@ "schema", "stream", "utils", + "dbt" ] BOOLEAN_FIELDS = {"flags_is_amp"} diff --git a/parsely_raw_data/dbt/__init__.py b/parsely_raw_data/dbt/__init__.py new file mode 100644 index 0000000..e61b783 --- /dev/null +++ b/parsely_raw_data/dbt/__init__.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import + +from . import redshift as parsely_dbt_redshift + +__all__ = [ + "parsely_dbt_redshift" +] diff --git a/dbt/redshift/Parsely_DPL_DBT_etl_model.png b/parsely_raw_data/dbt/redshift/Parsely_DPL_DBT_etl_model.png similarity index 100% rename from dbt/redshift/Parsely_DPL_DBT_etl_model.png rename to parsely_raw_data/dbt/redshift/Parsely_DPL_DBT_etl_model.png diff --git a/dbt/redshift/README.md b/parsely_raw_data/dbt/redshift/README.md similarity index 100% rename from dbt/redshift/README.md rename to parsely_raw_data/dbt/redshift/README.md diff --git a/parsely_raw_data/dbt/redshift/__init__.py b/parsely_raw_data/dbt/redshift/__init__.py new file mode 100644 index 0000000..a104bea --- /dev/null +++ b/parsely_raw_data/dbt/redshift/__init__.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import + +from .redshift_etl import migrate_from_s3_by_day +from .settings import ( + DBT_PROFILE_LOCATION, + DBT_PROFILE_TARGET_NAME, + ETL_END_DATE, + ETL_KEEP_RAW_DATA, + ETL_START_DATE, + PARSELY_RAW_DATA_TABLE, + REDSHIFT_DATABASE, + REDSHIFT_HOST, + REDSHIFT_PASSWORD, + REDSHIFT_PORT, + REDSHIFT_USER, + S3_AWS_ACCESS_KEY_ID, + S3_AWS_SECRET_ACCESS_KEY, + S3_NETWORK_NAME, +) +from .settings import migrate_settings \ No newline at end of file diff --git a/dbt/redshift/dbt_project.yml b/parsely_raw_data/dbt/redshift/dbt_project.yml similarity index 97% rename from dbt/redshift/dbt_project.yml rename to parsely_raw_data/dbt/redshift/dbt_project.yml index 3e801a0..ae83020 100644 --- a/dbt/redshift/dbt_project.yml +++ b/parsely_raw_data/dbt/redshift/dbt_project.yml @@ -18,7 +18,7 @@ - "{% if var('etl:keep_rawdata') == true %} select 1 {% else %} truncate table {{\ \ target.schema }}.parsely_rawdata {% endif %}" - "truncate table {{var('parsely:events')}}" -"profile": "parsely-dwh" +"profile": "parsely_dwh" "source-paths": - "models" "target-path": "target" diff --git a/dbt/redshift/models/base/calendar.sql b/parsely_raw_data/dbt/redshift/models/base/calendar.sql similarity index 100% rename from dbt/redshift/models/base/calendar.sql rename to parsely_raw_data/dbt/redshift/models/base/calendar.sql diff --git a/dbt/redshift/models/base/parsely_all_events.sql b/parsely_raw_data/dbt/redshift/models/base/parsely_all_events.sql similarity index 100% rename from dbt/redshift/models/base/parsely_all_events.sql rename to parsely_raw_data/dbt/redshift/models/base/parsely_all_events.sql diff --git a/dbt/redshift/models/base/parsely_base_events.sql b/parsely_raw_data/dbt/redshift/models/base/parsely_base_events.sql similarity index 100% rename from dbt/redshift/models/base/parsely_base_events.sql rename to parsely_raw_data/dbt/redshift/models/base/parsely_base_events.sql diff --git a/dbt/redshift/models/base/parsely_parent_pageview_keys.sql b/parsely_raw_data/dbt/redshift/models/base/parsely_parent_pageview_keys.sql similarity index 100% rename from dbt/redshift/models/base/parsely_parent_pageview_keys.sql rename to parsely_raw_data/dbt/redshift/models/base/parsely_parent_pageview_keys.sql diff --git a/dbt/redshift/models/base/parsely_parent_videostart_keys.sql b/parsely_raw_data/dbt/redshift/models/base/parsely_parent_videostart_keys.sql similarity index 100% rename from dbt/redshift/models/base/parsely_parent_videostart_keys.sql rename to parsely_raw_data/dbt/redshift/models/base/parsely_parent_videostart_keys.sql diff --git a/dbt/redshift/models/base/parsely_rawdata.sql b/parsely_raw_data/dbt/redshift/models/base/parsely_rawdata.sql similarity index 100% rename from dbt/redshift/models/base/parsely_rawdata.sql rename to parsely_raw_data/dbt/redshift/models/base/parsely_rawdata.sql diff --git a/dbt/redshift/models/behavior_workflow/parsely_pageview_behavior_workflow.sql b/parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_pageview_behavior_workflow.sql similarity index 100% rename from dbt/redshift/models/behavior_workflow/parsely_pageview_behavior_workflow.sql rename to parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_pageview_behavior_workflow.sql diff --git a/dbt/redshift/models/behavior_workflow/parsely_pageview_engagedtime.sql b/parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_pageview_engagedtime.sql similarity index 100% rename from dbt/redshift/models/behavior_workflow/parsely_pageview_engagedtime.sql rename to parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_pageview_engagedtime.sql diff --git a/dbt/redshift/models/behavior_workflow/parsely_pageview_hanging_engagedtime.sql b/parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_pageview_hanging_engagedtime.sql similarity index 100% rename from dbt/redshift/models/behavior_workflow/parsely_pageview_hanging_engagedtime.sql rename to parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_pageview_hanging_engagedtime.sql diff --git a/dbt/redshift/models/behavior_workflow/parsely_videoview_behavior_workflow.sql b/parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_videoview_behavior_workflow.sql similarity index 100% rename from dbt/redshift/models/behavior_workflow/parsely_videoview_behavior_workflow.sql rename to parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_videoview_behavior_workflow.sql diff --git a/dbt/redshift/models/behavior_workflow/parsely_videoview_engagedtime.sql b/parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_videoview_engagedtime.sql similarity index 100% rename from dbt/redshift/models/behavior_workflow/parsely_videoview_engagedtime.sql rename to parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_videoview_engagedtime.sql diff --git a/dbt/redshift/models/behavior_workflow/parsely_videoview_hanging_engagedtime.sql b/parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_videoview_hanging_engagedtime.sql similarity index 100% rename from dbt/redshift/models/behavior_workflow/parsely_videoview_hanging_engagedtime.sql rename to parsely_raw_data/dbt/redshift/models/behavior_workflow/parsely_videoview_hanging_engagedtime.sql diff --git a/dbt/redshift/models/campaigns/parsely_campaigns.sql b/parsely_raw_data/dbt/redshift/models/campaigns/parsely_campaigns.sql similarity index 100% rename from dbt/redshift/models/campaigns/parsely_campaigns.sql rename to parsely_raw_data/dbt/redshift/models/campaigns/parsely_campaigns.sql diff --git a/dbt/redshift/models/content/parsely_post_content.sql b/parsely_raw_data/dbt/redshift/models/content/parsely_post_content.sql similarity index 100% rename from dbt/redshift/models/content/parsely_post_content.sql rename to parsely_raw_data/dbt/redshift/models/content/parsely_post_content.sql diff --git a/dbt/redshift/models/content/parsely_video_content.sql b/parsely_raw_data/dbt/redshift/models/content/parsely_video_content.sql similarity index 100% rename from dbt/redshift/models/content/parsely_video_content.sql rename to parsely_raw_data/dbt/redshift/models/content/parsely_video_content.sql diff --git a/dbt/redshift/models/custom/parsely_custom_events.sql b/parsely_raw_data/dbt/redshift/models/custom/parsely_custom_events.sql similarity index 100% rename from dbt/redshift/models/custom/parsely_custom_events.sql rename to parsely_raw_data/dbt/redshift/models/custom/parsely_custom_events.sql diff --git a/dbt/redshift/models/excluded_events/parsely_bot_traffic.sql b/parsely_raw_data/dbt/redshift/models/excluded_events/parsely_bot_traffic.sql similarity index 100% rename from dbt/redshift/models/excluded_events/parsely_bot_traffic.sql rename to parsely_raw_data/dbt/redshift/models/excluded_events/parsely_bot_traffic.sql diff --git a/dbt/redshift/models/excluded_events/parsely_errors.sql b/parsely_raw_data/dbt/redshift/models/excluded_events/parsely_errors.sql similarity index 100% rename from dbt/redshift/models/excluded_events/parsely_errors.sql rename to parsely_raw_data/dbt/redshift/models/excluded_events/parsely_errors.sql diff --git a/dbt/redshift/models/pageviews/parsely_incoming_pageviews.sql b/parsely_raw_data/dbt/redshift/models/pageviews/parsely_incoming_pageviews.sql similarity index 100% rename from dbt/redshift/models/pageviews/parsely_incoming_pageviews.sql rename to parsely_raw_data/dbt/redshift/models/pageviews/parsely_incoming_pageviews.sql diff --git a/dbt/redshift/models/pageviews/parsely_pageviews_sessionized.sql b/parsely_raw_data/dbt/redshift/models/pageviews/parsely_pageviews_sessionized.sql similarity index 100% rename from dbt/redshift/models/pageviews/parsely_pageviews_sessionized.sql rename to parsely_raw_data/dbt/redshift/models/pageviews/parsely_pageviews_sessionized.sql diff --git a/dbt/redshift/models/sessions/parsely_entry_exit_urls.sql b/parsely_raw_data/dbt/redshift/models/sessions/parsely_entry_exit_urls.sql similarity index 100% rename from dbt/redshift/models/sessions/parsely_entry_exit_urls.sql rename to parsely_raw_data/dbt/redshift/models/sessions/parsely_entry_exit_urls.sql diff --git a/dbt/redshift/models/sessions/parsely_incoming_sessions.sql b/parsely_raw_data/dbt/redshift/models/sessions/parsely_incoming_sessions.sql similarity index 100% rename from dbt/redshift/models/sessions/parsely_incoming_sessions.sql rename to parsely_raw_data/dbt/redshift/models/sessions/parsely_incoming_sessions.sql diff --git a/dbt/redshift/models/sessions/parsely_sessions.sql b/parsely_raw_data/dbt/redshift/models/sessions/parsely_sessions.sql similarity index 100% rename from dbt/redshift/models/sessions/parsely_sessions.sql rename to parsely_raw_data/dbt/redshift/models/sessions/parsely_sessions.sql diff --git a/dbt/redshift/models/users/parsely_incoming_users.sql b/parsely_raw_data/dbt/redshift/models/users/parsely_incoming_users.sql similarity index 100% rename from dbt/redshift/models/users/parsely_incoming_users.sql rename to parsely_raw_data/dbt/redshift/models/users/parsely_incoming_users.sql diff --git a/dbt/redshift/models/users/parsely_users.sql b/parsely_raw_data/dbt/redshift/models/users/parsely_users.sql similarity index 100% rename from dbt/redshift/models/users/parsely_users.sql rename to parsely_raw_data/dbt/redshift/models/users/parsely_users.sql diff --git a/dbt/redshift/models/videoviews/parsely_incoming_videoviews.sql b/parsely_raw_data/dbt/redshift/models/videoviews/parsely_incoming_videoviews.sql similarity index 100% rename from dbt/redshift/models/videoviews/parsely_incoming_videoviews.sql rename to parsely_raw_data/dbt/redshift/models/videoviews/parsely_incoming_videoviews.sql diff --git a/dbt/redshift/models/videoviews/parsely_videoviews_sessionized.sql b/parsely_raw_data/dbt/redshift/models/videoviews/parsely_videoviews_sessionized.sql similarity index 100% rename from dbt/redshift/models/videoviews/parsely_videoviews_sessionized.sql rename to parsely_raw_data/dbt/redshift/models/videoviews/parsely_videoviews_sessionized.sql diff --git a/dbt/redshift/redshift_etl.py b/parsely_raw_data/dbt/redshift/redshift_etl.py similarity index 89% rename from dbt/redshift/redshift_etl.py rename to parsely_raw_data/dbt/redshift/redshift_etl.py index 1c0d696..3c680f0 100644 --- a/dbt/redshift/redshift_etl.py +++ b/parsely_raw_data/dbt/redshift/redshift_etl.py @@ -1,13 +1,13 @@ from __future__ import absolute_import import logging -import os +import pkg_resources import psycopg2 import subprocess from dateutil import rrule from parsely_raw_data import redshift as parsely_redshift from parsely_raw_data import utils as parsely_utils -from dbt.redshift.settings.default import ( +from parsely_raw_data.dbt.redshift.settings import ( DBT_PROFILE_LOCATION, DBT_PROFILE_TARGET_NAME, ETL_END_DATE, @@ -23,7 +23,7 @@ S3_AWS_SECRET_ACCESS_KEY, S3_NETWORK_NAME, ) -from dbt.redshift.settings.merge_settings_yaml import migrate_settings +from parsely_raw_data.dbt.redshift.settings.merge_settings_yaml import migrate_settings SETTINGS_ARG_MAPPING = { 'table_name': PARSELY_RAW_DATA_TABLE, @@ -82,9 +82,10 @@ def migrate_from_s3_by_day(network=S3_NETWORK_NAME, secret_access_key=secret_access_key) # This runs dbt once all of the new data has been copied into the raw data table - dpl_wd = os.path.join(os.getcwd(), 'dbt/redshift/') - logging.info(f'Running the dbt script located at: {dpl_wd}/run_parsely_dpl.sh') - subprocess.call(dpl_wd + "run_parsely_dpl.sh " + dbt_profiles_dir + ' ' + dbt_target, shell=True, cwd=dpl_wd) + dbt_etl_script_loc = pkg_resources.resource_filename("parsely_raw_data", "dbt/redshift/run_parsely_dpl.sh") + dbt_etl_cwd = pkg_resources.resource_filename("parsely_raw_data", "dbt/redshift/") + logging.info(f'Running the dbt script located at: {dbt_etl_script_loc}') + subprocess.call(dbt_etl_script_loc + ' ' + dbt_profiles_dir + ' ' + dbt_target, shell=True, cwd=dbt_etl_cwd) def main(): @@ -95,6 +96,8 @@ def main(): help='The last day to process data from S3 to Redshift in the format YYYY-MM-DD') parser.add_argument('--dbt_profiles_dir', required=False, default=DBT_PROFILE_LOCATION, help='The location from root that contains the .dbt/profiles.yml file, example: /home/user/.dbt/') + parser.add_argument('--dbt_profile', required=False, default='parsely_dwh', + help='The name of the dbt profile located in the local /.dbt/profiles.yml file') parser.add_argument('--dbt_target', required=False, default=DBT_PROFILE_TARGET_NAME, help='The target ie. dev, prod, or test to use within the dbt profiles.yml file.') parser.add_argument('--create-table', action='store_true', default=True, @@ -102,7 +105,7 @@ def main(): args = parser.parse_args() # Reset dbt_profile to any updated settings: - settings_migration = migrate_settings() + settings_migration = migrate_settings(profile=args.dbt_profile, table=args.table_name) if not settings_migration: logging.warning("Settings not copied to dbt_profiles.yml successfully.") raise Exception("Settings not copied to dbt_profiles.yml successfully. Please edit default.py or copy the" diff --git a/dbt/redshift/run_parsely_dpl.sh b/parsely_raw_data/dbt/redshift/run_parsely_dpl.sh similarity index 100% rename from dbt/redshift/run_parsely_dpl.sh rename to parsely_raw_data/dbt/redshift/run_parsely_dpl.sh diff --git a/parsely_raw_data/dbt/redshift/settings/__init__.py b/parsely_raw_data/dbt/redshift/settings/__init__.py new file mode 100644 index 0000000..4802098 --- /dev/null +++ b/parsely_raw_data/dbt/redshift/settings/__init__.py @@ -0,0 +1,3 @@ +from .default import * +from .merge_settings_yaml import migrate_settings + diff --git a/dbt/redshift/settings/default.py b/parsely_raw_data/dbt/redshift/settings/default.py similarity index 100% rename from dbt/redshift/settings/default.py rename to parsely_raw_data/dbt/redshift/settings/default.py diff --git a/dbt/redshift/settings/default.py.schema b/parsely_raw_data/dbt/redshift/settings/default.py.schema similarity index 100% rename from dbt/redshift/settings/default.py.schema rename to parsely_raw_data/dbt/redshift/settings/default.py.schema diff --git a/dbt/redshift/settings/merge_settings_yaml.py b/parsely_raw_data/dbt/redshift/settings/merge_settings_yaml.py similarity index 72% rename from dbt/redshift/settings/merge_settings_yaml.py rename to parsely_raw_data/dbt/redshift/settings/merge_settings_yaml.py index 00096c7..2315ee8 100644 --- a/dbt/redshift/settings/merge_settings_yaml.py +++ b/parsely_raw_data/dbt/redshift/settings/merge_settings_yaml.py @@ -1,9 +1,10 @@ import yaml -from dbt.redshift.settings.default import * +import pkg_resources +from pathlib import Path + +from .default import * SETTINGS_VAR_MAPPING = [ - {'location': 'profile', 'settings': DBT_PROFILE_NAME}, - {'location': 'parsely:events', 'settings': PARSELY_RAW_DATA_TABLE}, {'location': 'parsely:timezone', 'settings': ETL_TIME_ZONE}, {'location': 'parsely:actions', 'settings': ETL_PARSELY_ACTIONS}, {'location': 'etl:keep_rawdata', 'settings': ETL_KEEP_RAW_DATA}, @@ -19,8 +20,13 @@ ] -def migrate_settings(): - with open(r'dbt/redshift/dbt_project.yml') as file: +def migrate_settings(profile=DBT_PROFILE_NAME, table=PARSELY_RAW_DATA_TABLE): + # because this is a package resource, have to reference it with pkg_resources + filepath = pkg_resources.resource_filename("parsely_raw_data", "dbt/redshift/dbt_project.yml") + SETTINGS_VAR_MAPPING.append({'location': 'profile', 'settings': profile}) + SETTINGS_VAR_MAPPING.append({'location': 'parsely:events', 'settings': table}) + + with open(filepath) as file: dbt_profile = yaml.load(file, Loader=yaml.FullLoader) for row in SETTINGS_VAR_MAPPING: @@ -31,7 +37,7 @@ def migrate_settings(): dbt_profile['vars'][row['location']] = str(row['settings']) continue - with open(r'dbt/redshift/dbt_project.yml', 'w') as file: + with open(filepath, 'w') as file: yaml.dump(dbt_profile, file, default_style='"') stored_successfully = True diff --git a/requirements.txt b/requirements.txt index 0be5471..6ca308a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,16 @@ boto3>=1.4.4,<1.17 botocore>=1.5.0,<1.20 -google-api-core>=1.16.0,<1.24.0 -protobuf>=3.6.0,<3.15 -google-api-python-client +dbt>=0.15.0,<0.18 +google-api-core<1.17.0,>=1.16.0 +protobuf<3.12,>=3.6.0 +google-api-python-client==1.8.0 +oauth2client==4.1.3 psycopg2cffi-compat -six -tablib -xlsxwriter -tabulate -oauth2client -pytest -pyyaml<=5.1 \ No newline at end of file +pytest==6.2.1 +python-dateutil==2.8.1 +pyyaml==5.3.1 +six==1.15.0 +sqlalchemy==1.3.20 +tablib==3.0.0 +tabulate==0.8.7 +xlsxwriter==1.3.7 diff --git a/setup.py b/setup.py index 4be9ca1..0b8419e 100644 --- a/setup.py +++ b/setup.py @@ -97,7 +97,7 @@ def run_setup(): 'parsely_s3 = parsely_raw_data.s3:main', 'parsely_stream = parsely_raw_data.stream:main', 'parsely_schema = parsely_raw_data.docgen:main', - 'parsely_redshift_etl = dbt.redshift.redshift_etl:main' + 'parsely_redshift_etl = parsely_raw_data.dbt.redshift.redshift_etl:main' ] }, install_requires=install_requires,