diff --git a/mbs_results/estimation/pre_processing_estimation.py b/mbs_results/estimation/pre_processing_estimation.py index 036b5e89..1a8c45fd 100644 --- a/mbs_results/estimation/pre_processing_estimation.py +++ b/mbs_results/estimation/pre_processing_estimation.py @@ -1,4 +1,5 @@ from mbs_results.utilities.utils import read_colon_separated_file +from mbs_results.staging.data_cleaning import convert_cell_number def get_estimation_data( @@ -90,7 +91,7 @@ def derive_estimation_variables( reference: Str the name of the reference column **config: Dict - main pipeline configuration. Can be used to input the entire config dictionary + main pipeline configuration. Can be used to input the entire config dictionary Returns ------- @@ -99,6 +100,7 @@ def derive_estimation_variables( """ + population_frame = convert_cell_number(population_frame, cell_number) population_frame = population_frame.merge( calibration_group_map, on=[cell_number], how="left" diff --git a/mbs_results/staging/data_cleaning.py b/mbs_results/staging/data_cleaning.py index e2db8c16..10b015f2 100644 --- a/mbs_results/staging/data_cleaning.py +++ b/mbs_results/staging/data_cleaning.py @@ -379,22 +379,23 @@ def convert_cell_number( cell_number: str ): """ - + Convert NI and GB cell numbers to UK by changing the first digit to 5 if this is 7. + Parameters ---------- df : pd.DataFrame - DESCRIPTION. - cell_number : TYPE - DESCRIPTION. - : TYPE - DESCRIPTION. + Dataframe with cell_number column to convert + cell_number : str + Column name for cell_number in df Returns ------- - None. + pd.DataFrame + Dataframe with converted cell_number column (and original cell_number + in separate column) """ - df["ni_uk_cell_number"] = df[cell_number] + df["ni_gb_cell_number"] = df[cell_number] df[cell_number] = ( df[cell_number] .astype(str) diff --git a/tests/data/staging/data_cleaning/test_convert_cell_number.csv b/tests/data/staging/data_cleaning/test_convert_cell_number.csv index 41673854..8e5874b1 100644 --- a/tests/data/staging/data_cleaning/test_convert_cell_number.csv +++ b/tests/data/staging/data_cleaning/test_convert_cell_number.csv @@ -1,4 +1,4 @@ -cell_number,ni_uk_cell_number +cell_number,ni_gb_cell_number 1111,1111 2222,2222 3333,3333 diff --git a/tests/staging/test_data_cleaning.py b/tests/staging/test_data_cleaning.py index f3839b01..7b079ae1 100644 --- a/tests/staging/test_data_cleaning.py +++ b/tests/staging/test_data_cleaning.py @@ -116,7 +116,7 @@ def test_convert_cell_number(filepath): expected_output = pd.read_csv(filepath / "test_convert_cell_number.csv") - df_in = expected_output.drop(columns=["cell_number"]).rename(columns = {"ni_uk_cell_number" : "cell_number"}) + df_in = expected_output.drop(columns=["cell_number"]).rename(columns = {"ni_gb_cell_number" : "cell_number"}) actual_output = convert_cell_number(df_in, "cell_number")