1
+
2
+ from arraykit import shape_filter
3
+ from arraykit import resolve_dtype
4
+
5
+ import typing as tp
6
+ import numpy as np
7
+
8
+ #-------------------------------------------------------------------------------
9
+ def from_blocks (
10
+ raw_blocks : tp .Iterable [np .ndarray ],
11
+ ):
12
+ '''Simulation of legacy routine within TypeBlocks.
13
+ '''
14
+ index : tp .List [tp .Tuple [int , int ]] = [] # columns position to blocks key
15
+ block_count = 0
16
+ row_count = None
17
+ column_count = 0
18
+ dtype = None
19
+
20
+ for block in raw_blocks :
21
+ if not block .__class__ is np .ndarray :
22
+ raise ErrorInitTypeBlocks (f'found non array block: { block } ' )
23
+ if block .ndim > 2 :
24
+ raise ErrorInitTypeBlocks (f'cannot include array with { block .ndim } dimensions' )
25
+
26
+ r , c = shape_filter (block )
27
+
28
+ if row_count is not None and r != row_count : #type: ignore [unreachable]
29
+ raise ErrorInitTypeBlocks (f'mismatched row count: { r } : { row_count } ' )
30
+ else :
31
+ row_count = r
32
+ if c == 0 :
33
+ continue
34
+
35
+ if dtype is None :
36
+ dtype = block .dtype
37
+ else :
38
+ dtype = resolve_dtype (dtype , block .dtype )
39
+
40
+ for i in range (c ):
41
+ index .append ((block_count , i ))
42
+ column_count += c
43
+ block_count += 1
44
+ return (row_count , column_count ), index
45
+
46
+ #-------------------------------------------------------------------------------
47
+
48
+
49
+ def cols_to_slice (indices : tp .Sequence [int ]) -> slice :
50
+ '''Translate an iterable of contiguous integers into a slice.
51
+ Integers are assumed to be ordered (ascending or descending) and contiguous.
52
+ '''
53
+ start_idx = indices [0 ]
54
+ # single column as a single slice
55
+ if len (indices ) == 1 :
56
+ return slice (start_idx , start_idx + 1 )
57
+
58
+ stop_idx = indices [- 1 ]
59
+ if stop_idx > start_idx : # ascending indices
60
+ return slice (start_idx , stop_idx + 1 )
61
+
62
+ if stop_idx == 0 :
63
+ return slice (start_idx , None , - 1 )
64
+ # stop is less than start, need to reduce by 1 to cover range
65
+ return slice (start_idx , stop_idx - 1 , - 1 )
66
+
67
+ def indices_to_contiguous_pairs (indices : tp .Iterable [tp .Tuple [int , int ]]
68
+ ) -> tp .Iterator [tp .Tuple [int , slice ]]:
69
+ '''Indices are pairs of (block_idx, value); convert these to pairs of (block_idx, slice) when we identify contiguous indices
70
+ within a block (these are block slices)
71
+ '''
72
+ # store pairs of block idx, ascending col list
73
+ last : tp .Optional [tp .Tuple [int , int ]] = None
74
+
75
+ for block_idx , col in indices :
76
+ if not last :
77
+ last = (block_idx , col )
78
+ bundle = [col ]
79
+ continue
80
+ if last [0 ] == block_idx and abs (col - last [1 ]) == 1 :
81
+ # if contiguous, update last, add to bundle
82
+ last = (block_idx , col )
83
+ # do not need to store all col, only the last,
84
+ # however probably easier to just accumulate all
85
+ bundle .append (col )
86
+ continue
87
+ # either new block, or not contiguous on same block
88
+ yield (last [0 ], cols_to_slice (bundle ))
89
+ # start a new bundle
90
+ bundle = [col ]
91
+ last = (block_idx , col )
92
+
93
+ # last can be None
94
+ if last and bundle :
95
+ yield (last [0 ], cols_to_slice (bundle ))
96
+
97
+
98
+ class IterContiguous :
99
+ def __init__ (self , indices ):
100
+ self .indices = iter (indices )
101
+ self .last_block = - 1
102
+ self .last_column = - 1
103
+ self .next_block = - 1
104
+ self .next_column = - 1
105
+
106
+ @staticmethod
107
+ def build_slice (start , end_inclusive ):
108
+ # this works, but we reatain slices to force 2D selections; we might explore changing this
109
+ # if start == end_inclusive:
110
+ # return start
111
+
112
+ if start <= end_inclusive :
113
+ return slice (start , end_inclusive + 1 , None ) # can be 1
114
+ # reverse slice
115
+ if end_inclusive == 0 :
116
+ return slice (start , None , - 1 )
117
+ return slice (start , end_inclusive - 1 , - 1 )
118
+
119
+ def getter (self ) -> tp .Tuple [int , slice ]:
120
+ slice_start = - 1
121
+ while True :
122
+ if self .next_block == - 2 :
123
+ return None # terminate the loop
124
+ if self .next_block != - 1 :
125
+ # discontinuity found on last iteration, set new start
126
+ self .last_block = self .next_block
127
+ self .last_column = self .next_column
128
+ slice_start = self .last_column
129
+ self .next_block = - 1 # clear next state
130
+ self .next_column = - 1
131
+
132
+ try :
133
+ block , column = next (self .indices )
134
+ except StopIteration :
135
+ # no more pairs, but set a previous slice_start that has not been emitted
136
+ # return that now, and flag for end on next call
137
+ self .next_block = - 2
138
+ return self .last_block , self .build_slice (slice_start , self .last_column )
139
+
140
+ if self .last_block == - 1 :
141
+ # initialization
142
+ self .last_block = block
143
+ self .last_column = column
144
+ slice_start = column
145
+ continue
146
+
147
+ if self .last_block == block and abs (column - self .last_column ) == 1 : # contiguous
148
+ self .last_column = column
149
+ continue
150
+
151
+ # not contiguous, need to emit a slice for previous region
152
+ # store this block, column as next, so we have
153
+ self .next_block = block
154
+ self .next_column = column
155
+ return self .last_block , self .build_slice (slice_start , self .last_column )
156
+
157
+
158
+ def iter (self ) -> tp .Iterator [tp .Tuple [int , slice ]]:
159
+ while True :
160
+ post = self .getter ()
161
+ if post is not None :
162
+ yield post
163
+ else :
164
+ break
165
+
166
+ #-------------------------------------------------------------------------------
167
+
168
+
169
+
170
+ if __name__ == '__main__' :
171
+ samples = (
172
+ [(0 , 0 ), (0 , 1 ), (0 , 2 ), (1 , 1 ), (1 , 3 ), (2 , 0 ), (3 , 0 ), (3 , 1 ), (3 , 2 )],
173
+ [(0 , 0 ), (2 , 1 ), (3 , 5 ), (10 , 1 )],
174
+ [(0 , 0 ), (2 , 1 ), (2 , 2 ), (2 , 5 ), (2 , 6 ), (10 , 1 )],
175
+ [(10 , 1 )],
176
+ [(0 , 1 ), (0 , 2 ), (0 , 3 ), (0 , 4 )],
177
+ [(0 , 0 ), (2 , 3 ), (2 , 2 ), (2 , 1 ), (2 , 6 ), (10 , 1 )],
178
+ [(2 , 3 ), (0 , 0 ), (2 , 2 ), (2 , 1 ), (2 , 6 ), (2 , 7 )],
179
+ [(2 , 3 ), (2 , 2 ), (5 , 2 ), (5 , 1 ), (5 , 0 ), (2 , 1 ), (2 , 0 )],
180
+
181
+ )
182
+ for sample in samples :
183
+ p1 = list (indices_to_contiguous_pairs (sample ))
184
+ print (sample )
185
+ print (p1 )
186
+
187
+
188
+ iterc = IterContiguous (sample )
189
+ p2 = list (iterc .iter ())
190
+ print (p2 )
0 commit comments