diff --git a/data/children_act/10_pub_law_sdp_process.ipynb b/data/children_act/10_pub_law_sdp_process.ipynb index 4719e6f..ec74811 100644 --- a/data/children_act/10_pub_law_sdp_process.ipynb +++ b/data/children_act/10_pub_law_sdp_process.ipynb @@ -31,7 +31,7 @@ "#snapshot dates and publication period are set in the main run file. However, if running this notebook independently you will need to set them here\n", "#snapshot_date = \"2025-05-06\"\n", "#pub_year = 2025 #set the publication year\n", - "#pub_qtr = 1 #set the publication quarter" + "#pub_qtr = 3 #set the publication quarter" ] }, { @@ -108,10 +108,7 @@ "pydb.dataframe_to_temp_table(ca_apps_lookup, \"ca_apps_lookup\")\n", "\n", "ca_ords_lookup = pd.read_csv(f\"\"\"{folder_link}/ca_ords_lookup.csv\"\"\")\n", - "pydb.dataframe_to_temp_table(ca_ords_lookup, \"ca_ords_lookup\")\n", - "\n", - "ca_apps_lookup_v2 = pd.read_csv(f\"\"\"{folder_link}/ca_apps_lookup_v2.csv\"\"\")\n", - "pydb.dataframe_to_temp_table(ca_apps_lookup_v2, \"ca_apps_lookup_v2\")" + "pydb.dataframe_to_temp_table(ca_ords_lookup, \"ca_ords_lookup\")\n" ] }, { @@ -148,36 +145,7 @@ "ca_ords_breakdown = pd.read_csv(f\"\"\"{folder_link}/ca_ords_breakdown.csv\"\"\", keep_default_na = False, na_values = ['', 'NULL'])\n", "ca_ords_breakdown.columns = ca_ords_breakdown.columns.str.lower()\n", "ca_ords_breakdown = ca_ords_breakdown.astype({'qtr': 'str'})\n", - "\n", - "ca_ords_breakdown_orders = ca_ords_breakdown.loc[ca_ords_breakdown['disposal_type'] != 'Withdrawn', :]\n", - "ca_ords_breakdown_withdrawn = ca_ords_breakdown.loc[ca_ords_breakdown['disposal_type'] == 'Withdrawn', :]\n", - "#pydb.dataframe_to_temp_table(ca_ords_breakdown, \"ca_ords_breakdown\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [ - "# Putting them together as temporary tables\n", - "pydb.dataframe_to_temp_table(ca_ords_breakdown_orders, \"ca_ords_breakdown_orders\")\n", - "pydb.dataframe_to_temp_table(ca_ords_breakdown_withdrawn, \"ca_ords_breakdown_withdrawn\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d05aefad-9dec-4fe8-8974-1f38e4a89bc3", - "metadata": {}, - "outputs": [], - "source": [ - "# CCD Disposals\n", - "ca_ccd_disp_breakdown = pd.read_csv(f\"\"\"{folder_link}/ca_ccd_disp_breakdown.csv\"\"\", keep_default_na = False, na_values = ['', 'NULL'])\n", - "ca_ccd_disp_breakdown.columns = ca_ccd_disp_breakdown.columns.str.lower()\n", - "ca_ccd_disp_breakdown = ca_ccd_disp_breakdown.astype({'qtr': 'str'})\n", - "pydb.dataframe_to_temp_table(ca_ccd_disp_breakdown, \"ca_ccd_disp_breakdown\")" + "pydb.dataframe_to_temp_table(ca_ords_breakdown, \"ca_ords_breakdown\")\n" ] }, { @@ -220,7 +188,7 @@ "t1.Disposal_type,\n", "t2.order_desc,\n", "t2.order_type_code,\n", - "t1.Gender as Sex,\n", + "t1.Gender,\n", "t1.age_band,\n", "t1.Applicants_in_case,\n", "t1.Respondents_in_case,\n", @@ -235,7 +203,7 @@ "t1.Disposal_type,\n", "t2.order_desc,\n", "t2.order_type_code,\n", - "t1.Gender as Sex,\n", + "t1.Gender,\n", "t1.age_band,\n", "t1.Applicants_in_case,\n", "t1.Respondents_in_case,\n", @@ -283,7 +251,7 @@ "SUM(t1.Count) as count\n", "\n", "\n", - "FROM __temp__.ca_ords_breakdown_orders t1\n", + "FROM __temp__.ca_ords_breakdown t1\n", "LEFT JOIN __temp__.ca_ords_lookup t2\n", "ON t1.OrderMadeTypeKey = t2.OrderTypeKey\n", "\n", @@ -297,7 +265,7 @@ "t2.order_desc,\n", "CASE WHEN t1.Disposal_type IN ('Order made', 'Interim Order') THEN t2.order_type_code\n", "ELSE NULL END,\n", - "t1.Gender as Sex,\n", + "t1.Gender,\n", "t1.age_band,\n", "t1.Applicants_in_case,\n", "t1.Respondents_in_case,\n", @@ -313,7 +281,7 @@ "t2.order_desc,\n", "CASE WHEN t1.Disposal_type IN ('Order made', 'Interim Order') THEN t2.order_type_code\n", "ELSE NULL END,\n", - "t1.Gender as Sex,\n", + "t1.Gender,\n", "t1.age_band,\n", "t1.Applicants_in_case,\n", "t1.Respondents_in_case,\n", @@ -333,69 +301,6 @@ "#pydb.read_sql_query('SELECT * FROM __temp__.ca_ords_join')" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "15", - "metadata": {}, - "outputs": [], - "source": [ - "#Joining withdrawn orders breakdown to application lookup\n", - "pydb.create_temp_table(\n", - "f\"\"\"\n", - "SELECT\n", - "t1.Year,\n", - "t1.Qtr,\n", - "t1.Type,\n", - "t1.count_type,\n", - "t1.Public_private,\n", - "t1.Disposal_type,\n", - "t2.order_desc as Order_type,\n", - "CAST(NULL AS INT) as order_type_code,\n", - "t1.Gender as Sex,\n", - "t1.age_band,\n", - "t1.Applicants_in_case,\n", - "t1.Respondents_in_case,\n", - "t1.HC_Indicator,\n", - "SUM(t1.Count) as count\n", - "\n", - "\n", - "FROM __temp__.ca_ords_breakdown_withdrawn t1\n", - "LEFT JOIN __temp__.ca_apps_lookup_v2 t2\n", - "ON t1.order_type = t2.order_type\n", - "\n", - "GROUP BY\n", - "t1.Year,\n", - "t1.Qtr,\n", - "t1.Type,\n", - "t1.count_type,\n", - "t1.Public_private,\n", - "t1.Disposal_type,\n", - "t2.order_desc,\n", - "t1.Gender as Sex,\n", - "t1.age_band,\n", - "t1.Applicants_in_case,\n", - "t1.Respondents_in_case,\n", - "t1.HC_Indicator\n", - "\n", - "ORDER BY\n", - "t1.Year,\n", - "t1.Qtr,\n", - "t1.Type,\n", - "t1.count_type,\n", - "t1.Public_private,\n", - "t1.Disposal_type,\n", - "t2.order_desc,\n", - "t1.Gender as Sex,\n", - "t1.age_band,\n", - "t1.Applicants_in_case,\n", - "t1.Respondents_in_case,\n", - "t1.HC_Indicator\n", - "\n", - "\"\"\",\n", - "\"ca_ords_withdrawn_join\")" - ] - }, { "cell_type": "code", "execution_count": null, @@ -406,67 +311,6 @@ "#pydb.read_sql_query('SELECT * FROM __temp__.ca_ords_withdrawn_join')" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "3cc80726-61c8-4e62-b86c-89249e6e3468", - "metadata": {}, - "outputs": [], - "source": [ - "# Joining CCD Disposals together\n", - "pydb.create_temp_table(f\"\"\"SELECT\n", - "t1.Year,\n", - "t1.Qtr,\n", - "t1.Type,\n", - "t1.count_type,\n", - "t1.Public_private,\n", - "t1.Disposal_type,\n", - "t2.order_desc as Order_type,\n", - "CAST(NULL AS INT) as order_type_code,\n", - "t1.Gender as Sex,\n", - "t1.age_band,\n", - "t1.Applicants_in_case,\n", - "t1.Respondents_in_case,\n", - "t1.HC_Indicator,\n", - "SUM(t1.Count) as count\n", - "\n", - "\n", - "FROM __temp__.ca_ccd_disp_breakdown t1\n", - "LEFT JOIN __temp__.ca_apps_lookup t2\n", - "ON t1.applicationtypekey = t2.applicationtypekey\n", - "\n", - "GROUP BY\n", - "t1.Year,\n", - "t1.Qtr,\n", - "t1.Type,\n", - "t1.count_type,\n", - "t1.Public_private,\n", - "t1.Disposal_type,\n", - "t2.order_desc,\n", - "t1.Gender as Sex,\n", - "t1.age_band,\n", - "t1.Applicants_in_case,\n", - "t1.Respondents_in_case,\n", - "t1.HC_Indicator\n", - "\n", - "ORDER BY\n", - "t1.Year,\n", - "t1.Qtr,\n", - "t1.Type,\n", - "t1.count_type,\n", - "t1.Public_private,\n", - "t1.Disposal_type,\n", - "t2.order_desc,\n", - "t1.Gender as Sex,\n", - "t1.age_band,\n", - "t1.Applicants_in_case,\n", - "t1.Respondents_in_case,\n", - "t1.HC_Indicator\n", - "\n", - "\"\"\",\n", - "\"ca_ccd_disposals_join\")" - ] - }, { "cell_type": "code", "execution_count": null, @@ -481,8 +325,6 @@ "UNION ALL\n", "SELECT * FROM __temp__.ca_ords_join\n", "UNION ALL\n", - "SELECT * FROM __temp__.ca_ords_withdrawn_join\n", - "UNION ALL\n", "SELECT * FROM __temp__.ca_cases_child\n", "UNION ALL\n", "SELECT * FROM __temp__.combined_orders\n", @@ -544,7 +386,7 @@ "t1.Disposal_type,\n", "t1.Order_type,\n", "CAST(order_type_code AS INT) as order_type_code,\n", - "t1.Gender as Sex,\n", + "t1.Sex,\n", "t1.age_band,\n", "t1.Applicants_in_case,\n", "t1.Respondents_in_case,\n", @@ -558,6 +400,50 @@ "WHERE Public_private = 'Private law'\n", ") t1 \n", "\"\"\",\n", + "\"ca_csv_v1\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba534bf8-ed7d-4508-a378-2d2d2998add8", + "metadata": {}, + "outputs": [], + "source": [ + "# Combining SDP Public Law with only Private Law FamilyMan filtering out unneeded data\n", + "ca_csv_combine_sdp = pydb.create_temp_table(\n", + "f\"\"\"\n", + "SELECT * FROM __temp__.ca_csv_v1\n", + "WHERE order_type_code not in (29, 30, 31, 32, 34) AND Public_private = 'Public law'\n", + "UNION ALL\n", + "SELECT * FROM __temp__.ca_csv_v1\n", + "WHERE order_type_code IS NULL AND Public_private = 'Public law' \n", + "AND order_type not in ('Section 8 Contact Order', 'Section 8 Residence Order', 'Section 8 Prohibited Steps Order', 'Section 8 Specific Issue Order', 'Financial Application')\n", + "UNION ALL\n", + "SELECT * FROM __temp__.ca_csv_v1\n", + "WHERE order_type_code in (29, 30, 31, 32) and type = 'Application' and year < 2021 AND Public_private = 'Public law'\n", + "UNION ALL\n", + "SELECT * FROM __temp__.ca_csv_v1\n", + "WHERE order_type_code in (29, 30, 31, 32) and type = 'Disposal' and Public_private = 'Public law' and\n", + "((year < 2021) or (year > 2024) or (year = 2024 and qtr = '4'))\n", + "UNION ALL\n", + "SELECT * FROM __temp__.ca_csv_v1\n", + "WHERE order_type_code = 34 and type = 'Disposal' and year < 2023 and Public_private = 'Public law'\n", + "UNION ALL\n", + "SELECT * FROM __temp__.ca_csv_v1\n", + "WHERE Public_private = 'Private law' and order_type_code <> 34\n", + "UNION ALL\n", + "SELECT * FROM __temp__.ca_csv_v1\n", + "WHERE Public_private = 'Private law' and order_type_code = 34 and year < 2023\n", + "UNION ALL\n", + "SELECT * FROM __temp__.ca_csv_v1\n", + "WHERE order_type_code IS NULL AND Public_private = 'Private law' and order_type <> 'Financial Application'\n", + "UNION ALL\n", + "SELECT * FROM __temp__.ca_csv_v1\n", + "WHERE order_type_code IS NULL AND Public_private = 'Private law' and order_type = 'Financial Application' and year < 2023\n", + "\n", + "\n", + "\"\"\",\n", "\"ca_csv_sdp\")" ] }, @@ -602,7 +488,7 @@ "outputs": [], "source": [ "print(\"exporting main ca csv to s3....\")\n", - "ca_csv_df.to_csv (r's3://alpha-family-data/fcsq_processing/children_act_outputs/CSV Children Act.csv', header = True, index = False)" + "ca_csv_df.to_csv (r's3://alpha-family-data/fcsq_processing/children_act_outputs/CSV Test Children Act.csv', header = True, index = False)" ] }, { @@ -838,7 +724,7 @@ ], "metadata": { "kernelspec": { - "display_name": "fcsq_venv", + "display_name": "FCSQ_run", "language": "python", "name": "venv_fcsq" },