Skip to content

Commit

Permalink
Reader correctly handles postgres Arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
0xxon committed Jan 26, 2017
1 parent cbacc59 commit 62529f3
Show file tree
Hide file tree
Showing 9 changed files with 202 additions and 31 deletions.
78 changes: 61 additions & 17 deletions src/PostgresReader.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
// See the file "COPYING" in the main distribution directory for copyright.

#include <algorithm> // for replace
#include <fstream>
#include <sstream>
#include <regex>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
Expand Down Expand Up @@ -168,27 +166,73 @@ std::unique_ptr<Value> PostgreSQL::EntryToVal(string s, const threading::Field*
// Then - initialization for table.
// Then - initialization for vector.
{
istringstream splitstream(s);
bool real_array = true;
static std::regex comma_re(",");
// (?: -> first group, non-marking. Group describes double-quoted syntax
// \" -> array element has to start with double quote
// (.*?) -> non-greedy capture (number 1) for content of element
// (?!\\\\)\" -> element ends with a double quote that is not escaped (no \ in front)
// (?:,|$) -> followed either by comma or end of string
// )
// |
// (?: -> second group, non-marking. Group describes non-double-quoted syntax
// ([^,{}\"\\\\]+?) -> non-greedy capture (number 2). Minimal length of 1 (zero-length has to
// be quoted). May not contain a number of special characters.
// (?:,|$) -> followed either by comma or end of string
// )
static std::regex elements_re("(?:\"(.*?)(?!\\\\)\"(?:,|$))|(?:([^,{}\"\\\\]+?)(?:,|$))");
static std::regex escaped_re("(?:\\\\(\\\\))|(?:\\\\(\"))");

// assume it is a real array. We don't really have a much better
// way to figure this out because the Postgres code that can easily tell us the
// SQL type lives in the backend and cannot easily be included here...
auto it = std::sregex_token_iterator(s.begin()+1, s.end()-1, elements_re, {1,2});
static std::sregex_token_iterator end;

// Oh Not a postgres array. Just assume Bro-style comma separated values.
if ( s.front() != '{' || s.back() != '}' )
{
real_array = false;
it = std::sregex_token_iterator(s.begin(), s.end(), comma_re, -1);
}

unique_ptr<Field> newfield(new Field(*field));
newfield->type = field->subtype;

std::vector<std::unique_ptr<Value>> vals;

while ( splitstream )
int match_number = 0;
while ( it != end )
{
string element;
match_number++;
if ( ! (*it).matched )
{
it++;
continue;
}

if ( !getline(splitstream, element, ',') )
break;
string element = *it;

auto newval = EntryToVal(element, newfield.get());
if ( newval == nullptr ) {
Error("Error while reading set");
return nullptr;
}
// real postgres array and double-colons -> unescape
if ( real_array && match_number % 2 == 1 )
element = std::regex_replace(element, escaped_re, "$1$2");

// real postgres array, no double-colons, string equals null -> real null
if ( real_array && match_number % 2 == 0 && element == "NULL" )
// note that this actually leeds to problems at the moment downstream.
vals.emplace_back(new Value(field->subtype, false));
else
{
auto newval = EntryToVal(element, newfield.get());
if ( newval == nullptr )
{
Error("Error while reading set");
return nullptr;
}
vals.push_back(std::move(newval));
}

vals.push_back(std::move(newval));
it++;
}


Expand Down Expand Up @@ -264,9 +308,9 @@ bool PostgreSQL::DoUpdate()
ovals.emplace_back(std::unique_ptr<Value>(new Value(fields[j]->type, false)));
else
{
// str will be cleaned up by PQclear.
char *str = PQgetvalue(res, i, mapping[j]);
auto res = EntryToVal(str, fields[j]);
// PQgetvalue result will be cleaned up by PQclear.
string value (PQgetvalue(res, i, mapping[j]), PQgetlength(res, i, mapping[j]));
auto res = EntryToVal(value, fields[j]);
if ( res == nullptr )
{
// error occured, let's break out of this line. Just removing ovals will get rid of everything.
Expand Down
3 changes: 1 addition & 2 deletions src/PostgresWriter.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// See the file "COPYING" in the main distribution directory for copyright.

#include <algorithm>
#include <string>
#include <errno.h>
#include <vector>
Expand Down Expand Up @@ -259,7 +258,7 @@ bool PostgreSQL::DoHeartbeat(double network_time, double current_time)

std::tuple<bool, string, int> PostgreSQL::CreateParams(const Value* val)
{
static std::regex curly_re("\\{|\"");
static std::regex curly_re("\\\\|\"");

if ( ! val->present )
return std::make_tuple(false, string(), 0);
Expand Down
24 changes: 19 additions & 5 deletions tests/Baseline/postgres.read-basic/out
Original file line number Diff line number Diff line change
@@ -1,13 +1,27 @@
[b=T, i=-42, e=SSH::LOG, c=21, p=123/unknown, sn=10.0.0.0/24, a=1.2.3.4, d=3.14, t=1454444233.58016, iv=100.0, s=hurz, sc={
2,
4,
1,
0,
3
}, ss={
'BB'},
'AA',
{'CC'
BB,
AA,
CC
}, se={

}, vc=[0, 20, 30], ve=[]]
}, vc=[10, 20, 30], ve=[]]
[b=T, i=-43, e=SSH::LOG, c=21, p=123/unknown, sn=10.0.0.0/24, a=1.2.3.4, d=3.14, t=1454444233.58016, iv=100.0, s=hurz, sc={
2,
4,
1,
3
}, ss={
{""},",
\"\{},
NULL,
,
"
}, se={

}, vc=[10, 20, 30], ve=[]]
End of data
4 changes: 2 additions & 2 deletions tests/Baseline/postgres.read-conn/out
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[ts=1300475167.09653, uid=CHhAvVGS1DHFjwGM9, id=[orig_h=141.142.220.202, orig_p=5353/unknown, resp_h=224.0.0.251, resp_p=5353/unknown], proto=udp, service=dns, duration=<uninitialized>, orig_bytes=<uninitialized>, resp_bytes=<uninitialized>, conn_state=S0, local_orig=<uninitialized>, local_resp=<uninitialized>, missed_bytes=0, history=D, orig_pkts=1, orig_ip_bytes=73, resp_pkts=0, resp_ip_bytes=0, tunnel_parents={
{a,
b}
b,
a
}]
[ts=1300475167.09701, uid=ClEkJM2Vm5giqnMf4h, id=[orig_h=fe80::217:f2ff:fed7:cf65, orig_p=5353/unknown, resp_h=ff02::fb, resp_p=5353/unknown], proto=udp, service=dns, duration=<uninitialized>, orig_bytes=<uninitialized>, resp_bytes=<uninitialized>, conn_state=S0, local_orig=<uninitialized>, local_resp=<uninitialized>, missed_bytes=0, history=D, orig_pkts=1, orig_ip_bytes=199, resp_pkts=0, resp_ip_bytes=0, tunnel_parents={

Expand Down
13 changes: 13 additions & 0 deletions tests/Baseline/postgres.read-no-real-array/out
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[b=T, i=-42, e=SSH::LOG, c=21, p=123/unknown, sn=10.0.0.0/24, a=1.2.3.4, d=3.14, t=1454444233.58016, iv=100.0, s=hurz, sc={
2,
4,
1,
3
}, ss={
BB,
AA,
CC
}, se={

}, vc=[10, 20, 30], ve=[]]
End of data
6 changes: 3 additions & 3 deletions tests/Baseline/postgres.write-basic/ssh.out
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
id|b|i|e|c|p|sn|a|d|t|iv|s|sc|ss|se|vc|ve|f
1|t|-42|SSH::LOG|21|123|10.0.0.0/24|1.2.3.4|3.14|1485285824.55032|100|hurz|{2,4,1,3}|{BB,AA,CC}||{10,20,30}||SSHTest::foo
1|t|-42|SSH::LOG|21|123|10.0.0.0/24|1.2.3.4|3.14|1485448463.53086|100|hurz|{2,4,1,3}|{BB,AA,CC}||{10,20,30}||SSHTest::foo
{
if (0 < SSHTest::i)
return (Foo);
else
return (Bar);

}
2|t|-42|SSH::LOG|21|123|10.0.0.0/24|1.2.3.4|3.14|1485285824.55032|100|hurz|{2,4,1,3}|{"{\"\"hello","a b
cd~e","{{{{{}'","\""}||{10,20,30}||SSHTest::foo
2|t|-42|SSH::LOG|21|123|10.0.0.0/24|1.2.3.4|3.14|1485448463.53086|100|hurz|{2,4,1,3}|{"{\"\"\\hello","a b
cd~e","\\\"\\{}","{{{{{}'","\""}||{10,20,30}||SSHTest::foo
{
if (0 < SSHTest::i)
return (Foo);
Expand Down
3 changes: 2 additions & 1 deletion tests/postgres/read-basic.bro
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ ALTER SEQUENCE ssh_id_seq OWNED BY ssh.id;
ALTER TABLE ONLY ssh ALTER COLUMN id SET DEFAULT nextval('ssh_id_seq'::regclass);

COPY ssh (id, b, i, e, c, p, sn, a, d, t, iv, s, sc, ss, se, vc, ve, f) FROM stdin;
1 t -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14000000000000012 1454444233.58016205 100 hurz {2,4,1,3} {'CC','AA','BB'} \N {10,20,30} \N SSHTest::foo\n{ \nif (0 < SSHTest::i) \n\treturn (Foo);\nelse\n\treturn (Bar);\n\n}
1 t -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14000000000000012 1454444233.58016205 100 hurz {2,4,1,3} {CC,AA,BB} \N {10,20,30} \N SSHTest::foo\n{ \nif (0 < SSHTest::i) \n\treturn (Foo);\nelse\n\treturn (Bar);\n\n}
2 t -43 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14000000000000012 1454444233.58016205 100 hurz {2,4,1,3} {"", "\\"", "{\\"\\"},\\"", "\\\\\\"\\\\{}", "NULL"} \N {10,20,30} \N SSHTest::foo\n{ \nif (0 < SSHTest::i) \n\treturn (Foo);\nelse\n\treturn (Bar);\n\n}
\.
SELECT pg_catalog.setval('ssh_id_seq', 1, true);
Expand Down
100 changes: 100 additions & 0 deletions tests/postgres/read-no-real-array.bro
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# @TEST-SERIALIZE: postgres
# @TEST-EXEC: initdb postgres
# @TEST-EXEC: perl -pi.bak -E "s/#port =.*/port = 7772/;" postgres/postgresql.conf
# @TEST-EXEC: pg_ctl start -D postgres -l serverlog
# @TEST-EXEC: sleep 5
# @TEST-EXEC: createdb -p 7772 testdb
# @TEST-EXEC: psql -p 7772 testdb < dump.sql || true
# @TEST-EXEC: btest-bg-run bro bro %INPUT
# @TEST-EXEC: btest-bg-wait 10 || true
# @TEST-EXEC: pg_ctl stop -D postgres -m fast
# @TEST-EXEC: btest-diff out

@TEST-START-FILE dump.sql
CREATE TABLE ssh (
id integer NOT NULL,
b boolean,
i integer,
e text,
c integer,
p integer,
sn inet,
a inet,
d double precision,
t double precision,
iv double precision,
s text,
sc text,
ss text,
se text,
vc text,
ve text,
f text
);

CREATE SEQUENCE ssh_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;

ALTER SEQUENCE ssh_id_seq OWNED BY ssh.id;
ALTER TABLE ONLY ssh ALTER COLUMN id SET DEFAULT nextval('ssh_id_seq'::regclass);

COPY ssh (id, b, i, e, c, p, sn, a, d, t, iv, s, sc, ss, se, vc, ve, f) FROM stdin;
1 t -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14000000000000012 1454444233.58016205 100 hurz 2,4,1,3 CC,AA,BB \N 10,20,30 \N SSHTest::foo\n{ \nif (0 < SSHTest::i) \n\treturn (Foo);\nelse\n\treturn (Bar);\n\n}
\.

SELECT pg_catalog.setval('ssh_id_seq', 1, true);
ALTER TABLE ONLY ssh
ADD CONSTRAINT ssh_id_key UNIQUE (id);

@TEST-END-FILE


redef exit_only_after_terminate = T;

global outfile: file;


type InfoType: record {
b: bool;
i: int;
e: Log::ID;
c: count;
p: port;
sn: subnet;
a: addr;
d: double;
t: time;
iv: interval;
s: string;
sc: set[count];
ss: set[string];
se: set[string];
vc: vector of count;
ve: vector of string;
# f: function(i: count) : string;
};

event line(description: Input::EventDescription, tpe: Input::Event, r: InfoType)
{
print outfile, r;
}

event bro_init()
{
outfile = open("../out");
Input::add_event([$source="select * from ssh;", $name="postgres", $fields=InfoType, $ev=line, $want_record=T,
$reader=Input::READER_POSTGRESQL, $config=table(["dbname"]="testdb", ["port"]="7772")]);
}

event Input::end_of_data(name: string, source:string)
{
print outfile, "End of data";
close(outfile);
terminate();
}


2 changes: 1 addition & 1 deletion tests/postgres/write-basic.bro
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ event bro_init()
$iv=100secs,
$s="hurz",
$sc=set(1,2,3,4),
$ss=set("\"", "{{{{{}'", "{\"\"\\hello", "a\tb\nc\rd\x01\x02\x03\x7Ee"),
$ss=set("\\\"\\{}", "\"", "{{{{{}'", "{\"\"\\hello", "a\tb\nc\rd\x01\x02\x03\x7Ee"),
$se=empty_set,
$vc=vector(10, 20, 30),
$ve=empty_vector,
Expand Down

0 comments on commit 62529f3

Please sign in to comment.