-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchunk.py
More file actions
133 lines (115 loc) · 4.55 KB
/
chunk.py
File metadata and controls
133 lines (115 loc) · 4.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import numpy as np
def blockshaped(arr, nrows, ncols):
"""
Return an array of shape (n, nrows, ncols) where
n * nrows * ncols = arr.size
If arr is a 2D array, the returned array should look like n subblocks with
each subblock preserving the "physical" layout of arr.
"""
h, w = arr.shape
return (arr.reshape(h//nrows, nrows, -1, ncols)
.swapaxes(1,2)
.reshape(-1, nrows, ncols))
#c = np.arange(320000).reshape((400,800)) #auto generating the values
#getting dumped values from file
fadm = open("newfile.adm","r+")
filecontent = fadm.read()
fadm.close()
filecontent.replace('/n',"")
#print "length of file:",len(filecontent)
pos_uuid = filecontent.find("fid",0)
meta_content = ''
new_content = ''
while pos_uuid>=0:
#finding the uuids, to process a record at a time in this loop
pos_uuid_end = filecontent.find('",',pos_uuid)
uuid = filecontent[pos_uuid+7:pos_uuid_end]
#finding sst_data
sst_start = filecontent.find('"analysed_sst":{{', pos_uuid)
sst_end = filecontent.find('}}',sst_start)
sst_data = filecontent[sst_start+17:sst_end]
#finding analysis_error data
err_start = filecontent.find('"analysis_error":{{', pos_uuid)
err_end = filecontent.find('}}',err_start)
err_data = filecontent[err_start+19:err_end]
#finding sea ice fraction data
sif_start = filecontent.find('"sea_ice_fraction":{{', 0)
sif_end = filecontent.find('}}',sif_start)
sif_data = filecontent[sif_start+21:sif_end]
metadata = filecontent[pos_uuid:sst_start-1]
file_meta = '{"'+metadata+'}'
meta_content = meta_content+file_meta
# put a for loop from 1 to 3, based on value assign the variable data either sst_data,err_data and sif_data
for loop in range(1,3):
if loop == 1:
data = sst_data
pos_start = sst_start+16
pos_end = sst_end
uuid_append = "_sst_"
var_name = "analysed_sst"
print "sst:"
elif loop == 2:
data = err_data
pos_start = err_start+18
pos_end = err_end
uuid_append = "_err_"
var_name = "analysis_error"
print "err:"
elif loop == 3:
data = sif_data
pos_start = sif_start+20
pos_end = sif_end
uuid_append = "_sif_"
var_name = "sea_ice_fraction"
print "sif:"
i=1
sub = ','
s_temp = data
print "length of s_temp file:",len(s_temp)
pos = s_temp.rfind(',')
# while loop to help find the position of comma of start of last 1800 values... basically to help seperate the 901th row of data
while (i<1800):
s_temp = s_temp[0:pos]
pos = s_temp.rfind(',')
i= i + 1
#print data[0:pos] # 900x1800
#print data[pos+1:] # last row of 1800
json_data1 = '['+ data[0:pos] +']' # 900x1800
json_data2 = '[' + data[pos+1:] +']' # last row of 1800
print "count of commas"
print json_data1.count(',')
print json_data2.count(',')
#print json_data1
#print json_data2
jarray1 = eval(json_data1)
jarray2 = eval(json_data2)
big_grid = np.reshape(jarray1,(900,1800))
grid_row = np.reshape(jarray2,(1,1800))
first_block = blockshaped(big_grid, 50, 50) #working with 50 rows 50 columns
last_row = blockshaped(grid_row, 1, 50)
new_file = ''
for i in range(0,647):# interating over every chunk
chunk_write = ''
for j in range(0, 49):
#print chunks[0][j]
chunk_write = chunk_write + str(first_block[i][j])
#conforming to adm format
str_comma = chunk_write.replace(' ',',')
str_comma = str_comma.replace(',,,',',')
str_comma = str_comma.replace(',,',',')
str_comma = str_comma.replace('[,','[')
str_comma = str_comma.replace('[',',[')
str_comma = '['+str_comma[1:]+']'
#constructing new rec
new_rec_chunk = '{"fid":"'+uuid+uuid_append+str(i)+'", "'+var_name+'":'+str_comma+'}'
new_file = new_file + new_rec_chunk
new_content = new_content + new_file
# don't forget to advance to the next fid
pos_uuid = filecontent.find("fid",pos_uuid+5)
print pos_uuid
new_content = new_content + meta_content
print "writing to new file"
print new_content
finaladm =file("chunks.adm","w")
finaladm.write(new_content)
finaladm.close()