Skip to content

Commit 620ba0c

Browse files
committed
source-postgres: Add 'flatten_arrays' flag
Currently source-postgres captures array columns as a JSON object with dimensions and elements properties, each of which is an array. This is because PostgreSQL arrays are inherently multidimensional, and we're trying to preserve them as faithfully as possible. But it turns out that nobody ever wants that, they just want simple, boring one-dimensional arrays of values to translate into a JSON array of equivalent values. So we should do that by default going forward.
1 parent 60033ae commit 620ba0c

8 files changed

+482
-6
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# ================================
2+
# Collection "acmeCo/test/test/featureflagflattenarrays_70143951": 4 Documents
3+
# ================================
4+
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"featureflagflattenarrays_70143951","loc":[11111111,11111111,11111111]}},"id":1,"int_array":{"dimensions":[3],"elements":[1,2,3]},"nested_array":{"dimensions":[2,2],"elements":[1,2,3,4]},"text_array":{"dimensions":[3],"elements":["a","b","c"]}}
5+
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"featureflagflattenarrays_70143951","loc":[11111111,11111111,11111111]}},"id":2,"int_array":{"dimensions":[2],"elements":[10,20]},"nested_array":{"dimensions":[2,2],"elements":[5,6,7,8]},"text_array":{"dimensions":[2],"elements":["foo","bar"]}}
6+
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"featureflagflattenarrays_70143951","loc":[11111111,11111111,11111111]}},"id":3,"int_array":{"dimensions":[],"elements":[]},"nested_array":{"dimensions":[],"elements":[]},"text_array":{"dimensions":[],"elements":[]}}
7+
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"featureflagflattenarrays_70143951","loc":[11111111,11111111,11111111]}},"id":4,"int_array":null,"nested_array":null,"text_array":null}
8+
# ================================
9+
# Final State Checkpoint
10+
# ================================
11+
{"bindingStateV1":{"test%2Ffeatureflagflattenarrays_70143951":{"backfilled":4,"key_columns":["id"],"mode":"Active"}},"cursor":"0/1111111"}
12+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
Binding 0:
2+
{
3+
"recommended_name": "test/featureflagflattenarrays_70143951",
4+
"resource_config_json": {
5+
"namespace": "test",
6+
"stream": "featureflagflattenarrays_70143951"
7+
},
8+
"document_schema_json": {
9+
"$defs": {
10+
"TestFeatureflagflattenarrays_70143951": {
11+
"type": "object",
12+
"required": [
13+
"id"
14+
],
15+
"$anchor": "TestFeatureflagflattenarrays_70143951",
16+
"properties": {
17+
"id": {
18+
"type": "integer",
19+
"description": "(source type: non-nullable int4)"
20+
},
21+
"int_array": {
22+
"required": [
23+
"dimensions",
24+
"elements"
25+
],
26+
"description": "(source type: _int4)",
27+
"properties": {
28+
"dimensions": {
29+
"items": {
30+
"type": "integer"
31+
},
32+
"type": "array"
33+
},
34+
"elements": {
35+
"items": {
36+
"type": [
37+
"integer",
38+
"null"
39+
]
40+
},
41+
"type": "array"
42+
}
43+
},
44+
"type": [
45+
"object",
46+
"null"
47+
]
48+
},
49+
"nested_array": {
50+
"required": [
51+
"dimensions",
52+
"elements"
53+
],
54+
"description": "(source type: _int4)",
55+
"properties": {
56+
"dimensions": {
57+
"items": {
58+
"type": "integer"
59+
},
60+
"type": "array"
61+
},
62+
"elements": {
63+
"items": {
64+
"type": [
65+
"integer",
66+
"null"
67+
]
68+
},
69+
"type": "array"
70+
}
71+
},
72+
"type": [
73+
"object",
74+
"null"
75+
]
76+
},
77+
"text_array": {
78+
"required": [
79+
"dimensions",
80+
"elements"
81+
],
82+
"description": "(source type: _text)",
83+
"properties": {
84+
"dimensions": {
85+
"items": {
86+
"type": "integer"
87+
},
88+
"type": "array"
89+
},
90+
"elements": {
91+
"items": {
92+
"type": [
93+
"string",
94+
"null"
95+
]
96+
},
97+
"type": "array"
98+
}
99+
},
100+
"type": [
101+
"object",
102+
"null"
103+
]
104+
}
105+
}
106+
}
107+
},
108+
"allOf": [
109+
{
110+
"if": {
111+
"properties": {
112+
"_meta": {
113+
"properties": {
114+
"op": {
115+
"const": "d"
116+
}
117+
}
118+
}
119+
}
120+
},
121+
"then": {
122+
"reduce": {
123+
"delete": true,
124+
"strategy": "merge"
125+
}
126+
},
127+
"else": {
128+
"reduce": {
129+
"strategy": "merge"
130+
}
131+
},
132+
"required": [
133+
"_meta"
134+
],
135+
"properties": {
136+
"_meta": {
137+
"type": "object",
138+
"required": [
139+
"op",
140+
"source"
141+
],
142+
"properties": {
143+
"before": {
144+
"$ref": "#TestFeatureflagflattenarrays_70143951",
145+
"description": "Record state immediately before this change was applied.",
146+
"reduce": {
147+
"strategy": "firstWriteWins"
148+
}
149+
},
150+
"op": {
151+
"enum": [
152+
"c",
153+
"d",
154+
"u"
155+
],
156+
"description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete."
157+
},
158+
"source": {
159+
"$id": "https://github.com/estuary/connectors/source-postgres/postgres-source",
160+
"properties": {
161+
"ts_ms": {
162+
"type": "integer",
163+
"description": "Unix timestamp (in millis) at which this event was recorded by the database."
164+
},
165+
"schema": {
166+
"type": "string",
167+
"description": "Database schema (namespace) of the event."
168+
},
169+
"snapshot": {
170+
"type": "boolean",
171+
"description": "Snapshot is true if the record was produced from an initial table backfill and unset if produced from the replication log."
172+
},
173+
"table": {
174+
"type": "string",
175+
"description": "Database table of the event."
176+
},
177+
"loc": {
178+
"items": {
179+
"type": "integer"
180+
},
181+
"type": "array",
182+
"maxItems": 3,
183+
"minItems": 3,
184+
"description": "Location of this WAL event as [last Commit.EndLSN; event LSN; current Begin.FinalLSN]. See https://www.postgresql.org/docs/current/protocol-logicalrep-message-formats.html"
185+
},
186+
"txid": {
187+
"type": "integer",
188+
"description": "The 32-bit transaction ID assigned by Postgres to the commit which produced this change."
189+
}
190+
},
191+
"type": "object",
192+
"required": [
193+
"schema",
194+
"table",
195+
"loc"
196+
]
197+
}
198+
},
199+
"reduce": {
200+
"strategy": "merge"
201+
}
202+
}
203+
}
204+
},
205+
{
206+
"$ref": "#TestFeatureflagflattenarrays_70143951"
207+
}
208+
]
209+
},
210+
"key": [
211+
"/id"
212+
]
213+
}
214+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# ================================
2+
# Collection "acmeCo/test/test/featureflagflattenarrays_70143951": 4 Documents
3+
# ================================
4+
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"featureflagflattenarrays_70143951","loc":[11111111,11111111,11111111]}},"id":1,"int_array":[1,2,3],"nested_array":[1,2,3,4],"text_array":["a","b","c"]}
5+
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"featureflagflattenarrays_70143951","loc":[11111111,11111111,11111111]}},"id":2,"int_array":[10,20],"nested_array":[5,6,7,8],"text_array":["foo","bar"]}
6+
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"featureflagflattenarrays_70143951","loc":[11111111,11111111,11111111]}},"id":3,"int_array":[],"nested_array":[],"text_array":[]}
7+
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"featureflagflattenarrays_70143951","loc":[11111111,11111111,11111111]}},"id":4,"int_array":null,"nested_array":null,"text_array":null}
8+
# ================================
9+
# Final State Checkpoint
10+
# ================================
11+
{"bindingStateV1":{"test%2Ffeatureflagflattenarrays_70143951":{"backfilled":4,"key_columns":["id"],"mode":"Active"}},"cursor":"0/1111111"}
12+

0 commit comments

Comments
 (0)