Skip to content

Commit

Permalink
Adding convert_cell_number to estimation and small changes to convert…
Browse files Browse the repository at this point in the history
…_cell_number function
  • Loading branch information
lhubbardONS committed Jan 28, 2025
1 parent 27793a5 commit ada4855
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 11 deletions.
4 changes: 3 additions & 1 deletion mbs_results/estimation/pre_processing_estimation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from mbs_results.utilities.utils import read_colon_separated_file
from mbs_results.staging.data_cleaning import convert_cell_number


def get_estimation_data(
Expand Down Expand Up @@ -90,7 +91,7 @@ def derive_estimation_variables(
reference: Str
the name of the reference column
**config: Dict
main pipeline configuration. Can be used to input the entire config dictionary
main pipeline configuration. Can be used to input the entire config dictionary
Returns
-------
Expand All @@ -99,6 +100,7 @@ def derive_estimation_variables(
"""

population_frame = convert_cell_number(population_frame, cell_number)

population_frame = population_frame.merge(
calibration_group_map, on=[cell_number], how="left"
Expand Down
17 changes: 9 additions & 8 deletions mbs_results/staging/data_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,22 +379,23 @@ def convert_cell_number(
cell_number: str
):
"""
Convert NI and GB cell numbers to UK by changing the first digit to 5 if this is 7.
Parameters
----------
df : pd.DataFrame
DESCRIPTION.
cell_number : TYPE
DESCRIPTION.
: TYPE
DESCRIPTION.
Dataframe with cell_number column to convert
cell_number : str
Column name for cell_number in df
Returns
-------
None.
pd.DataFrame
Dataframe with converted cell_number column (and original cell_number
in separate column)
"""
df["ni_uk_cell_number"] = df[cell_number]
df["ni_gb_cell_number"] = df[cell_number]
df[cell_number] = (
df[cell_number]
.astype(str)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cell_number,ni_uk_cell_number
cell_number,ni_gb_cell_number
1111,1111
2222,2222
3333,3333
Expand Down
2 changes: 1 addition & 1 deletion tests/staging/test_data_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_convert_cell_number(filepath):

expected_output = pd.read_csv(filepath / "test_convert_cell_number.csv")

df_in = expected_output.drop(columns=["cell_number"]).rename(columns = {"ni_uk_cell_number" : "cell_number"})
df_in = expected_output.drop(columns=["cell_number"]).rename(columns = {"ni_gb_cell_number" : "cell_number"})

actual_output = convert_cell_number(df_in, "cell_number")

Expand Down

0 comments on commit ada4855

Please sign in to comment.