-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathimpute_material_and_age.sql
51 lines (50 loc) · 1.93 KB
/
impute_material_and_age.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
-- get most common material by decade
-- most common material from 1920-1960 is cast iron
-- most common material from 1960-1970 is steel
-- these assumptions used below to build features.static_features_imputed
-- data very sparse -- we should revisit these assumptions
select decade, material
from
(
select decade, material, row_number() over(partition by decade order by streets desc) rank
from
(
select cast(install_year_original as int) / 10 * 10 decade, material, count(*) streets
from
(
select sf.*,
case when install_year = 0 or install_year is null then min_year else install_year end install_year_original
from features.static_features sf
left join features.street_min_year smy on smy.street_id = sf.street_id
where sf.material is not null
) temp
group by cast(install_year_original as int) / 10 * 10, material
) temp2
) temp3
where rank = 1;
drop table if exists features.static_features_imputed;
create table features.static_features_imputed as
select temp.street_id,
temp.diameters,
temp.rocktype1,
temp.rocktype2,
temp.musym,
temp.zone_name,
temp.install_year_original,
temp.install_year_parcel_data,
temp.install_year_raw,
case when material is null and install_year_original < 1920 then 'Cast_Iron'
when material is null and install_year_original >= 1970 then 'Ductile_Iron'
when material is null and install_year_original between 1920 and 1959 then 'Cast_Iron'
when material is null and install_year_original between 1960 and 1969 then 'Steel'
else material
end as material_imputed
from
(
select sf.*,
case when install_year != 0 and install_year is not null then install_year else null end install_year_raw,
case when install_year = 0 or install_year is null then min_year else install_year end install_year_original,
min_year install_year_parcel_data
from features.static_features sf
left join features.street_min_year smy on smy.street_id = sf.street_id
) temp;