diff --git a/bench/encode.exs b/bench/encode.exs index 9569cc7..74763bc 100644 --- a/bench/encode.exs +++ b/bench/encode.exs @@ -20,21 +20,21 @@ types = [:u32, :string, :datetime, {:array, :string}] Benchee.run( %{ - "csv stream" => fn -> - rows |> NimbleCSV.RFC4180.dump_to_stream() |> Stream.run() - end, - "csv" => fn -> - NimbleCSV.RFC4180.dump_to_iodata(rows) - end, - "encode_rows + csv stream" => fn -> - rows |> Stream.map(&CSV.encode_row/1) |> NimbleCSV.RFC4180.dump_to_stream() |> Stream.run() - end, - "encode_rows + csv" => fn -> - rows |> CSV.encode_rows() |> NimbleCSV.RFC4180.dump_to_iodata() - end, - "row_binary stream" => fn -> - rows |> Stream.map(&RowBinary.encode_row(&1, types)) |> Stream.run() - end, + # "csv stream" => fn -> + # rows |> NimbleCSV.RFC4180.dump_to_stream() |> Stream.run() + # end, + # "csv" => fn -> + # NimbleCSV.RFC4180.dump_to_iodata(rows) + # end, + # "encode_rows + csv stream" => fn -> + # rows |> Stream.map(&CSV.encode_row/1) |> NimbleCSV.RFC4180.dump_to_stream() |> Stream.run() + # end, + # "encode_rows + csv" => fn -> + # rows |> CSV.encode_rows() |> NimbleCSV.RFC4180.dump_to_iodata() + # end, + # "row_binary stream" => fn -> + # rows |> Stream.map(&RowBinary.encode_row(&1, types)) |> Stream.run() + # end, "row_binary" => fn -> RowBinary.encode_rows(rows, types) end diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index 9a711b3..a2442cf 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -18,25 +18,29 @@ defmodule Ch.RowBinary do def encode_row([] = done, []), do: done def encode_rows([row | rows], types), do: encode_rows(row, types, rows, types) - def encode_rows([] = done, _types), do: done + def encode_rows([] = empty, _types), do: empty defp encode_rows([el | els], [t | ts], rows, types) do [encode(t, el) | encode_rows(els, ts, rows, types)] end - defp encode_rows([], [], rows, types), do: encode_rows(rows, types) + defp encode_rows([], [], [row | rows], types) do + encode_rows(row, types, rows, types) + end - def encode(:varint, num) when is_integer(num) and num < 128, do: <<num>> + defp encode_rows([], [], [] = done, _types), do: done - def encode(:varint, num) when is_integer(num) do - [<<1::1, num::7>> | encode(:varint, num >>> 7)] - end + @doc false + def encode_varint(num) when num <= 0x7F, do: num + def encode_varint(num), do: [num &&& 0x7F | encode_varint_cont(num >>> 7)] + defp encode_varint_cont(num) when num <= 0x7F, do: [num] + defp encode_varint_cont(num), do: [num &&& 0x7F | encode_varint_cont(num >>> 7)] def encode(type, str) when type in [:string, :binary] do case str do - _ when is_binary(str) -> [encode(:varint, byte_size(str)) | str] - _ when is_list(str) -> [encode(:varint, IO.iodata_length(str)) | str] - nil -> <<0>> + _ when is_binary(str) -> [encode_varint(byte_size(str)) | str] + _ when is_list(str) -> [encode_varint(IO.iodata_length(str)) | str] + nil -> 0 end end @@ -51,7 +55,7 @@ defmodule Ch.RowBinary do def encode(string(size: size), nil), do: <<0::size(size * 8)>> - for size <- [8, 16, 32, 64, 128, 256] do + for size <- [32, 64, 8, 16, 128, 256] do def encode(unquote(:"u#{size}"), i) when is_integer(i) do <<i::unquote(size)-little>> end @@ -72,34 +76,27 @@ defmodule Ch.RowBinary do def encode(unquote(:"f#{size}"), nil), do: <<0::unquote(size)>> end - def encode(decimal(size: size, scale: scale), %Decimal{sign: sign, coef: coef, exp: exp}) - when scale == -exp do - i = sign * coef - <<i::size(size)-little>> - end - - def encode(decimal(size: size, scale: scale), %Decimal{sign: sign, coef: coef, exp: exp}) - when exp >= 0 do - i = sign * coef * round(:math.pow(10, exp + scale)) - <<i::size(size)-little>> + def encode(:boolean, bool) do + case bool do + true -> 1 + false -> 0 + nil -> 0 + end end - def encode(decimal(scale: scale) = t, %Decimal{} = d) do - encode(t, Decimal.round(d, scale)) + def encode({:array, type}, l) do + case l do + [_ | _] -> [encode_varint(length(l)) | encode_many(type, l)] + [] -> 0 + nil -> 0 + end end - def encode(decimal(size: size), nil), do: <<0::size(size)>> - - def encode(:boolean, true), do: <<1>> - def encode(:boolean, false), do: <<0>> - def encode(:boolean, nil), do: <<0>> - - def encode({:array, type}, [_ | _] = l) do - [encode(:varint, length(l)) | encode_many(l, type)] + def encode(:date, %Date{} = date) do + <<Date.diff(date, @epoch_date)::16-little>> end - def encode({:array, _type}, []), do: <<0>> - def encode({:array, _type}, nil), do: <<0>> + def encode(:date, nil), do: <<0::16>> def encode(:datetime, %NaiveDateTime{} = datetime) do <<NaiveDateTime.diff(datetime, @epoch_naive_datetime)::32-little>> @@ -117,6 +114,24 @@ defmodule Ch.RowBinary do encode(t, v) end + def encode(decimal(size: size, scale: scale), %Decimal{sign: sign, coef: coef, exp: exp}) + when scale == -exp do + i = sign * coef + <<i::size(size)-little>> + end + + def encode(decimal(size: size, scale: scale), %Decimal{sign: sign, coef: coef, exp: exp}) + when exp >= 0 do + i = sign * coef * round(:math.pow(10, exp + scale)) + <<i::size(size)-little>> + end + + def encode(decimal(scale: scale) = t, %Decimal{} = d) do + encode(t, Decimal.round(d, scale)) + end + + def encode(decimal(size: size), nil), do: <<0::size(size)>> + def encode(datetime64(unit: unit), %NaiveDateTime{} = datetime) do <<NaiveDateTime.diff(datetime, @epoch_naive_datetime, unit)::64-little-signed>> end @@ -127,12 +142,6 @@ defmodule Ch.RowBinary do def encode(datetime64(), nil), do: <<0::64>> - def encode(:date, %Date{} = date) do - <<Date.diff(date, @epoch_date)::16-little>> - end - - def encode(:date, nil), do: <<0::16>> - def encode(:date32, %Date{} = date) do <<Date.diff(date, @epoch_date)::32-little-signed>> end @@ -158,10 +167,19 @@ defmodule Ch.RowBinary do def encode(:uuid, nil), do: <<0::128>> def encode({:nullable, _type}, nil), do: 1 - def encode({:nullable, type}, value), do: [0 | encode(type, value)] + def encode({:nullable, type}, value), do: [0 | ensure_iodata(encode(type, value))] + + defp encode_many(type, values) do + case values do + [el | rest] -> [encode(type, el) | encode_many(type, rest)] + [] = done -> done + end + end - defp encode_many([el | rest], type), do: [encode(type, el) | encode_many(rest, type)] - defp encode_many([] = done, _type), do: done + @compile inline: [ensure_iodata: 1] + defp ensure_iodata(b) when is_binary(b), do: b + defp ensure_iodata(l) when is_list(l), do: l + defp ensure_iodata(i) when is_integer(i), do: [i] @compile {:inline, d: 1} @@ -220,6 +238,7 @@ defmodule Ch.RowBinary do def encode_type({:nullable, type}), do: ["Nullable(", encode_type(type), ?)] def encode_type({:array, type}), do: ["Array(", encode_type(type), ?)] + def encode_type({:low_cardinality, type}), do: ["LowCardinality(", encode_type(type), ?)] def encode_type(:datetime), do: "DateTime" def encode_type({:datetime, timezone}) when is_binary(timezone) do diff --git a/test/ch/row_binary_test.exs b/test/ch/row_binary_test.exs index b781220..6d1a067 100644 --- a/test/ch/row_binary_test.exs +++ b/test/ch/row_binary_test.exs @@ -85,6 +85,11 @@ defmodule Ch.RowBinaryTest do {{:nullable, :string}, "string"} ] + for {type, expected} <- spec do + encoded = IO.iodata_to_binary(encode(type, expected)) |> IO.inspect() + assert decode_rows(encoded, [type]) == [[expected]] + end + num_cols = length(spec) {types, row} = Enum.unzip(spec) @@ -123,7 +128,8 @@ defmodule Ch.RowBinaryTest do test "nil" do assert encode({:nullable, :string}, nil) == 1 - assert encode(:string, nil) == <<0>> + assert encode({:nullable, :string}, "") == [0, 0 | ""] + assert encode(:string, nil) == 0 assert encode(string(size: 2), nil) == <<0, 0>> assert encode(:u8, nil) == <<0>> assert encode(:u16, nil) == <<0, 0>> @@ -135,8 +141,8 @@ defmodule Ch.RowBinaryTest do assert encode(:i64, nil) == <<0, 0, 0, 0, 0, 0, 0, 0>> assert encode(:f32, nil) == <<0, 0, 0, 0>> assert encode(:f64, nil) == <<0, 0, 0, 0, 0, 0, 0, 0>> - assert encode(:boolean, nil) == <<0>> - assert encode({:array, :string}, nil) == <<0>> + assert encode(:boolean, nil) == 0 + assert encode({:array, :string}, nil) == 0 assert encode(:date, nil) == <<0, 0>> assert encode(:date32, nil) == <<0, 0, 0, 0>> assert encode(:datetime, nil) == <<0, 0, 0, 0>>