diff --git a/src/PostgresReader.cc b/src/PostgresReader.cc index 38ac32b..9eb1c63 100644 --- a/src/PostgresReader.cc +++ b/src/PostgresReader.cc @@ -1,8 +1,6 @@ // See the file "COPYING" in the main distribution directory for copyright. -#include // for replace -#include -#include +#include #include #include #include @@ -168,27 +166,73 @@ std::unique_ptr PostgreSQL::EntryToVal(string s, const threading::Field* // Then - initialization for table. // Then - initialization for vector. { - istringstream splitstream(s); + bool real_array = true; + static std::regex comma_re(","); + // (?: -> first group, non-marking. Group describes double-quoted syntax + // \" -> array element has to start with double quote + // (.*?) -> non-greedy capture (number 1) for content of element + // (?!\\\\)\" -> element ends with a double quote that is not escaped (no \ in front) + // (?:,|$) -> followed either by comma or end of string + // ) + // | + // (?: -> second group, non-marking. Group describes non-double-quoted syntax + // ([^,{}\"\\\\]+?) -> non-greedy capture (number 2). Minimal length of 1 (zero-length has to + // be quoted). May not contain a number of special characters. + // (?:,|$) -> followed either by comma or end of string + // ) + static std::regex elements_re("(?:\"(.*?)(?!\\\\)\"(?:,|$))|(?:([^,{}\"\\\\]+?)(?:,|$))"); + static std::regex escaped_re("(?:\\\\(\\\\))|(?:\\\\(\"))"); + + // assume it is a real array. We don't really have a much better + // way to figure this out because the Postgres code that can easily tell us the + // SQL type lives in the backend and cannot easily be included here... + auto it = std::sregex_token_iterator(s.begin()+1, s.end()-1, elements_re, {1,2}); + static std::sregex_token_iterator end; + + // Oh Not a postgres array. Just assume Bro-style comma separated values. + if ( s.front() != '{' || s.back() != '}' ) + { + real_array = false; + it = std::sregex_token_iterator(s.begin(), s.end(), comma_re, -1); + } unique_ptr newfield(new Field(*field)); newfield->type = field->subtype; std::vector> vals; - while ( splitstream ) + int match_number = 0; + while ( it != end ) { - string element; + match_number++; + if ( ! (*it).matched ) + { + it++; + continue; + } - if ( !getline(splitstream, element, ',') ) - break; + string element = *it; - auto newval = EntryToVal(element, newfield.get()); - if ( newval == nullptr ) { - Error("Error while reading set"); - return nullptr; - } + // real postgres array and double-colons -> unescape + if ( real_array && match_number % 2 == 1 ) + element = std::regex_replace(element, escaped_re, "$1$2"); + + // real postgres array, no double-colons, string equals null -> real null + if ( real_array && match_number % 2 == 0 && element == "NULL" ) + // note that this actually leeds to problems at the moment downstream. + vals.emplace_back(new Value(field->subtype, false)); + else + { + auto newval = EntryToVal(element, newfield.get()); + if ( newval == nullptr ) + { + Error("Error while reading set"); + return nullptr; + } + vals.push_back(std::move(newval)); + } - vals.push_back(std::move(newval)); + it++; } @@ -264,9 +308,9 @@ bool PostgreSQL::DoUpdate() ovals.emplace_back(std::unique_ptr(new Value(fields[j]->type, false))); else { - // str will be cleaned up by PQclear. - char *str = PQgetvalue(res, i, mapping[j]); - auto res = EntryToVal(str, fields[j]); + // PQgetvalue result will be cleaned up by PQclear. + string value (PQgetvalue(res, i, mapping[j]), PQgetlength(res, i, mapping[j])); + auto res = EntryToVal(value, fields[j]); if ( res == nullptr ) { // error occured, let's break out of this line. Just removing ovals will get rid of everything. diff --git a/src/PostgresWriter.cc b/src/PostgresWriter.cc index e472760..b8d0500 100644 --- a/src/PostgresWriter.cc +++ b/src/PostgresWriter.cc @@ -1,6 +1,5 @@ // See the file "COPYING" in the main distribution directory for copyright. -#include #include #include #include @@ -259,7 +258,7 @@ bool PostgreSQL::DoHeartbeat(double network_time, double current_time) std::tuple PostgreSQL::CreateParams(const Value* val) { - static std::regex curly_re("\\{|\""); + static std::regex curly_re("\\\\|\""); if ( ! val->present ) return std::make_tuple(false, string(), 0); diff --git a/tests/Baseline/postgres.read-basic/out b/tests/Baseline/postgres.read-basic/out index d839116..1a503e4 100644 --- a/tests/Baseline/postgres.read-basic/out +++ b/tests/Baseline/postgres.read-basic/out @@ -1,13 +1,27 @@ [b=T, i=-42, e=SSH::LOG, c=21, p=123/unknown, sn=10.0.0.0/24, a=1.2.3.4, d=3.14, t=1454444233.58016, iv=100.0, s=hurz, sc={ +2, 4, 1, -0, 3 }, ss={ -'BB'}, -'AA', -{'CC' +BB, +AA, +CC }, se={ -}, vc=[0, 20, 30], ve=[]] +}, vc=[10, 20, 30], ve=[]] +[b=T, i=-43, e=SSH::LOG, c=21, p=123/unknown, sn=10.0.0.0/24, a=1.2.3.4, d=3.14, t=1454444233.58016, iv=100.0, s=hurz, sc={ +2, +4, +1, +3 +}, ss={ +{""},", +\"\{}, +NULL, +, +" +}, se={ + +}, vc=[10, 20, 30], ve=[]] End of data diff --git a/tests/Baseline/postgres.read-conn/out b/tests/Baseline/postgres.read-conn/out index c7ecf5e..78027ca 100644 --- a/tests/Baseline/postgres.read-conn/out +++ b/tests/Baseline/postgres.read-conn/out @@ -1,6 +1,6 @@ [ts=1300475167.09653, uid=CHhAvVGS1DHFjwGM9, id=[orig_h=141.142.220.202, orig_p=5353/unknown, resp_h=224.0.0.251, resp_p=5353/unknown], proto=udp, service=dns, duration=, orig_bytes=, resp_bytes=, conn_state=S0, local_orig=, local_resp=, missed_bytes=0, history=D, orig_pkts=1, orig_ip_bytes=73, resp_pkts=0, resp_ip_bytes=0, tunnel_parents={ -{a, -b} +b, +a }] [ts=1300475167.09701, uid=ClEkJM2Vm5giqnMf4h, id=[orig_h=fe80::217:f2ff:fed7:cf65, orig_p=5353/unknown, resp_h=ff02::fb, resp_p=5353/unknown], proto=udp, service=dns, duration=, orig_bytes=, resp_bytes=, conn_state=S0, local_orig=, local_resp=, missed_bytes=0, history=D, orig_pkts=1, orig_ip_bytes=199, resp_pkts=0, resp_ip_bytes=0, tunnel_parents={ diff --git a/tests/Baseline/postgres.read-no-real-array/out b/tests/Baseline/postgres.read-no-real-array/out new file mode 100644 index 0000000..2600e41 --- /dev/null +++ b/tests/Baseline/postgres.read-no-real-array/out @@ -0,0 +1,13 @@ +[b=T, i=-42, e=SSH::LOG, c=21, p=123/unknown, sn=10.0.0.0/24, a=1.2.3.4, d=3.14, t=1454444233.58016, iv=100.0, s=hurz, sc={ +2, +4, +1, +3 +}, ss={ +BB, +AA, +CC +}, se={ + +}, vc=[10, 20, 30], ve=[]] +End of data diff --git a/tests/Baseline/postgres.write-basic/ssh.out b/tests/Baseline/postgres.write-basic/ssh.out index 7e28669..a5edf8d 100644 --- a/tests/Baseline/postgres.write-basic/ssh.out +++ b/tests/Baseline/postgres.write-basic/ssh.out @@ -1,5 +1,5 @@ id|b|i|e|c|p|sn|a|d|t|iv|s|sc|ss|se|vc|ve|f -1|t|-42|SSH::LOG|21|123|10.0.0.0/24|1.2.3.4|3.14|1485285824.55032|100|hurz|{2,4,1,3}|{BB,AA,CC}||{10,20,30}||SSHTest::foo +1|t|-42|SSH::LOG|21|123|10.0.0.0/24|1.2.3.4|3.14|1485448463.53086|100|hurz|{2,4,1,3}|{BB,AA,CC}||{10,20,30}||SSHTest::foo { if (0 < SSHTest::i) return (Foo); @@ -7,8 +7,8 @@ else return (Bar); } -2|t|-42|SSH::LOG|21|123|10.0.0.0/24|1.2.3.4|3.14|1485285824.55032|100|hurz|{2,4,1,3}|{"{\"\"hello","a b -c d~e","{{{{{}'","\""}||{10,20,30}||SSHTest::foo +2|t|-42|SSH::LOG|21|123|10.0.0.0/24|1.2.3.4|3.14|1485448463.53086|100|hurz|{2,4,1,3}|{"{\"\"\\hello","a b +c d~e","\\\"\\{}","{{{{{}'","\""}||{10,20,30}||SSHTest::foo { if (0 < SSHTest::i) return (Foo); diff --git a/tests/postgres/read-basic.bro b/tests/postgres/read-basic.bro index 4afd576..8a34b91 100644 --- a/tests/postgres/read-basic.bro +++ b/tests/postgres/read-basic.bro @@ -43,7 +43,8 @@ ALTER SEQUENCE ssh_id_seq OWNED BY ssh.id; ALTER TABLE ONLY ssh ALTER COLUMN id SET DEFAULT nextval('ssh_id_seq'::regclass); COPY ssh (id, b, i, e, c, p, sn, a, d, t, iv, s, sc, ss, se, vc, ve, f) FROM stdin; -1 t -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14000000000000012 1454444233.58016205 100 hurz {2,4,1,3} {'CC','AA','BB'} \N {10,20,30} \N SSHTest::foo\n{ \nif (0 < SSHTest::i) \n\treturn (Foo);\nelse\n\treturn (Bar);\n\n} +1 t -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14000000000000012 1454444233.58016205 100 hurz {2,4,1,3} {CC,AA,BB} \N {10,20,30} \N SSHTest::foo\n{ \nif (0 < SSHTest::i) \n\treturn (Foo);\nelse\n\treturn (Bar);\n\n} +2 t -43 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14000000000000012 1454444233.58016205 100 hurz {2,4,1,3} {"", "\\"", "{\\"\\"},\\"", "\\\\\\"\\\\{}", "NULL"} \N {10,20,30} \N SSHTest::foo\n{ \nif (0 < SSHTest::i) \n\treturn (Foo);\nelse\n\treturn (Bar);\n\n} \. SELECT pg_catalog.setval('ssh_id_seq', 1, true); diff --git a/tests/postgres/read-no-real-array.bro b/tests/postgres/read-no-real-array.bro new file mode 100644 index 0000000..d8455c5 --- /dev/null +++ b/tests/postgres/read-no-real-array.bro @@ -0,0 +1,100 @@ +# @TEST-SERIALIZE: postgres +# @TEST-EXEC: initdb postgres +# @TEST-EXEC: perl -pi.bak -E "s/#port =.*/port = 7772/;" postgres/postgresql.conf +# @TEST-EXEC: pg_ctl start -D postgres -l serverlog +# @TEST-EXEC: sleep 5 +# @TEST-EXEC: createdb -p 7772 testdb +# @TEST-EXEC: psql -p 7772 testdb < dump.sql || true +# @TEST-EXEC: btest-bg-run bro bro %INPUT +# @TEST-EXEC: btest-bg-wait 10 || true +# @TEST-EXEC: pg_ctl stop -D postgres -m fast +# @TEST-EXEC: btest-diff out + +@TEST-START-FILE dump.sql +CREATE TABLE ssh ( + id integer NOT NULL, + b boolean, + i integer, + e text, + c integer, + p integer, + sn inet, + a inet, + d double precision, + t double precision, + iv double precision, + s text, + sc text, + ss text, + se text, + vc text, + ve text, + f text +); + +CREATE SEQUENCE ssh_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE ssh_id_seq OWNED BY ssh.id; +ALTER TABLE ONLY ssh ALTER COLUMN id SET DEFAULT nextval('ssh_id_seq'::regclass); + +COPY ssh (id, b, i, e, c, p, sn, a, d, t, iv, s, sc, ss, se, vc, ve, f) FROM stdin; +1 t -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14000000000000012 1454444233.58016205 100 hurz 2,4,1,3 CC,AA,BB \N 10,20,30 \N SSHTest::foo\n{ \nif (0 < SSHTest::i) \n\treturn (Foo);\nelse\n\treturn (Bar);\n\n} +\. + +SELECT pg_catalog.setval('ssh_id_seq', 1, true); +ALTER TABLE ONLY ssh + ADD CONSTRAINT ssh_id_key UNIQUE (id); + +@TEST-END-FILE + + +redef exit_only_after_terminate = T; + +global outfile: file; + + +type InfoType: record { + b: bool; + i: int; + e: Log::ID; + c: count; + p: port; + sn: subnet; + a: addr; + d: double; + t: time; + iv: interval; + s: string; + sc: set[count]; + ss: set[string]; + se: set[string]; + vc: vector of count; + ve: vector of string; +# f: function(i: count) : string; +}; + +event line(description: Input::EventDescription, tpe: Input::Event, r: InfoType) + { + print outfile, r; + } + +event bro_init() + { + outfile = open("../out"); + Input::add_event([$source="select * from ssh;", $name="postgres", $fields=InfoType, $ev=line, $want_record=T, + $reader=Input::READER_POSTGRESQL, $config=table(["dbname"]="testdb", ["port"]="7772")]); + } + +event Input::end_of_data(name: string, source:string) + { + print outfile, "End of data"; + close(outfile); + terminate(); + } + + diff --git a/tests/postgres/write-basic.bro b/tests/postgres/write-basic.bro index 605979d..e376d5a 100644 --- a/tests/postgres/write-basic.bro +++ b/tests/postgres/write-basic.bro @@ -88,7 +88,7 @@ event bro_init() $iv=100secs, $s="hurz", $sc=set(1,2,3,4), - $ss=set("\"", "{{{{{}'", "{\"\"\\hello", "a\tb\nc\rd\x01\x02\x03\x7Ee"), + $ss=set("\\\"\\{}", "\"", "{{{{{}'", "{\"\"\\hello", "a\tb\nc\rd\x01\x02\x03\x7Ee"), $se=empty_set, $vc=vector(10, 20, 30), $ve=empty_vector,