diff --git a/models/core/fct_lab_times.sql b/models/core/fct_lab_times.sql new file mode 100644 index 0000000..37d62b0 --- /dev/null +++ b/models/core/fct_lab_times.sql @@ -0,0 +1,13 @@ +with lap_times as ( + select + {{ dbt_utils.generate_surrogate_key(['race_id', 'driver_id', 'lap']) }} as lap_times_id, + race_id as race_id, + driver_id as driver_id, + lap as lap, + driver_position as driver_position, + lap_time_formatted as lap_time_formatted, + official_laptime as official_laptime, + lap_time_milliseconds as lap_time_milliseconds + from {{ ref('stg_lap_times') }} +) +select * from lap_times \ No newline at end of file diff --git a/models/marts/aggregates/agg_lap_times_moving_avg_2.py b/models/marts/aggregates/agg_lap_times_moving_avg_2.py new file mode 100644 index 0000000..b1dc3ac --- /dev/null +++ b/models/marts/aggregates/agg_lap_times_moving_avg_2.py @@ -0,0 +1,17 @@ +import pandas as pd + +def model(dbt, session): + # dbt configuration + dbt.config(packages=["pandas"]) + + # get upstream data + lap_times = dbt.ref("mrt_lap_times_years").to_pandas() + + # describe the data + lap_times["LAP_TIME_SECONDS"] = lap_times["LAP_TIME_MILLISECONDS"]/1000 + lap_time_trends = lap_times.groupby(by="RACE_YEAR")["LAP_TIME_SECONDS"].mean().to_frame() + lap_time_trends.reset_index(inplace=True) + lap_time_trends["LAP_MOVING_AVG_5_YEARS"] = lap_time_trends["LAP_TIME_SECONDS"].rolling(5).mean() + lap_time_trends.columns = lap_time_trends.columns.str.upper() + + return lap_time_trends.round(1) \ No newline at end of file diff --git a/models/marts/mrt_lab_times_years.sql b/models/marts/mrt_lab_times_years.sql new file mode 100644 index 0000000..b35db7e --- /dev/null +++ b/models/marts/mrt_lab_times_years.sql @@ -0,0 +1,19 @@ +with lap_times as ( +select * from {{ ref('fct_lap_times') }} + ), + races as ( + select * from {{ ref('dim_races') }} + ), + expanded_lap_times_by_year as ( + select + lap_times.race_id, + driver_id, + race_year, + lap, + lap_time_milliseconds + from lap_times + left join races + on lap_times.race_id = races.race_id + where lap_time_milliseconds is not null + ) + select * from expanded_lap_times_by_year \ No newline at end of file diff --git a/models/ml/prep_encoding_splitting/covariate_encoding.py b/models/ml/prep_encoding_splitting/covariate_encoding.py index 3b44892..c42fbde 100644 --- a/models/ml/prep_encoding_splitting/covariate_encoding.py +++ b/models/ml/prep_encoding_splitting/covariate_encoding.py @@ -5,7 +5,7 @@ def model(dbt, session): # dbt configuration - dbt.config(packages=["pandas","numpy","scikit-learn"]) + dbt.config(packages=["pandas==1.5.3","numpy","scikit-learn"]) # get upstream data data = dbt.ref("ml_data_prep").to_pandas() diff --git a/models/ml/prep_encoding_splitting/ml_data_prep.py b/models/ml/prep_encoding_splitting/ml_data_prep.py index 29f2eaf..1f9ef5b 100644 --- a/models/ml/prep_encoding_splitting/ml_data_prep.py +++ b/models/ml/prep_encoding_splitting/ml_data_prep.py @@ -2,7 +2,7 @@ def model(dbt, session): # dbt configuration - dbt.config(packages=["pandas"]) + dbt.config(packages=["pandas==1.5.3"]) # get upstream data fct_results = dbt.ref("mrt_results_circuits").to_pandas()