-
Notifications
You must be signed in to change notification settings - Fork 97
CSV uploads for ClickHouse Cloud #236
Changes from all commits
2b26552
7ad64a0
7fb8206
6b8935b
d1c025d
2cec411
a51c32e
847b80f
5554a61
eb3cdc7
490a558
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
(ns metabase.driver.clickhouse | ||
"Driver for ClickHouse databases" | ||
#_{:clj-kondo/ignore [:unsorted-required-namespaces]} | ||
(:require [clojure.string :as str] | ||
(:require [clojure.core.memoize :as memoize] | ||
[clojure.string :as str] | ||
[honey.sql :as sql] | ||
[metabase [config :as config]] | ||
[metabase.driver :as driver] | ||
[metabase.driver.clickhouse-introspection] | ||
|
@@ -12,8 +14,11 @@ | |
[metabase.driver.sql-jdbc [common :as sql-jdbc.common] | ||
[connection :as sql-jdbc.conn]] | ||
[metabase.driver.sql-jdbc.execute :as sql-jdbc.execute] | ||
[metabase.driver.sql.query-processor :as sql.qp] | ||
[metabase.driver.sql.util :as sql.u] | ||
[metabase.util.log :as log])) | ||
[metabase.upload :as upload] | ||
[metabase.util.log :as log]) | ||
(:import [com.clickhouse.jdbc.internal ClickHouseStatementImpl])) | ||
|
||
(set! *warn-on-reflection* true) | ||
|
||
|
@@ -32,17 +37,17 @@ | |
:test/jvm-timezone-setting false | ||
:connection-impersonation false | ||
:schemas true | ||
:datetime-diff true}] | ||
:datetime-diff true | ||
:upload-with-auto-pk false}] | ||
|
||
(defmethod driver/database-supports? [:clickhouse feature] [_driver _feature _db] supported?)) | ||
|
||
(def ^:private default-connection-details | ||
{:user "default" :password "" :dbname "default" :host "localhost" :port "8123"}) | ||
|
||
(defmethod sql-jdbc.conn/connection-details->spec :clickhouse | ||
[_ details] | ||
;; ensure defaults merge on top of nils | ||
(let [details (reduce-kv (fn [m k v] (assoc m k (or v (k default-connection-details)))) | ||
(defn- connection-details->spec* [details] | ||
(let [;; ensure defaults merge on top of nils | ||
details (reduce-kv (fn [m k v] (assoc m k (or v (k default-connection-details)))) | ||
default-connection-details | ||
details) | ||
{:keys [user password dbname host port ssl use-no-proxy]} details | ||
|
@@ -61,6 +66,33 @@ | |
:product_name product-name} | ||
(sql-jdbc.common/handle-additional-options details :separator-style :url)))) | ||
|
||
(def ^:private ^{:arglists '([db-details])} cloud? | ||
"Is this a cloud DB?" | ||
(memoize/ttl | ||
(fn [db-details] | ||
(sql-jdbc.execute/do-with-connection-with-options | ||
:clickhouse | ||
(connection-details->spec* db-details) | ||
nil | ||
(fn [^java.sql.Connection conn] | ||
(with-open [stmt (.prepareStatement conn "SELECT value='1' FROM system.settings WHERE name='cloud_mode'") | ||
rset (.executeQuery stmt)] | ||
(when (.next rset) | ||
(.getBoolean rset 1)))))) | ||
;; cache the results for 48 hours; TTL is here only to eventually clear out old entries | ||
:ttl/threshold (* 48 60 60 1000))) | ||
|
||
(defmethod sql-jdbc.conn/connection-details->spec :clickhouse | ||
[_ details] | ||
(cond-> (connection-details->spec* details) | ||
(cloud? details) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From what I understand, select_sequential_consistency just limits which replica you query from, and doesn't affect the performance of the query on that specific replica. This logic seems like it will break consistency for multi-replica on prem installations, and needlessly disable it on single replica ones. It seems best to just to enable it unconditionally for now? For any multi-replica installation, enabling it for all connections seems to wipe out most of the benefits of connecting to a distributed installation, but I don't see any way around this besides reworking metabase to support separate pools for different connection flavors. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you're mistaking the behaviour of There's more on SharedMergeTree in this blog post.
And I can't imagine fetching metadata from Keeper can be a bottleneck. I also got this DM from Serge on
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, For on-premise clusters (in a follow-up PR), we should just set |
||
;; select_sequential_consistency guarantees that we can query data from any replica in CH Cloud | ||
;; immediately after it is written | ||
(assoc :select_sequential_consistency true))) | ||
|
||
(defmethod driver/database-supports? [:clickhouse :uploads] [_driver _feature db] | ||
(cloud? (:details db))) | ||
|
||
(defmethod driver/can-connect? :clickhouse | ||
[driver details] | ||
(if config/is-test? | ||
|
@@ -112,6 +144,83 @@ | |
:semantic-version {:major (.getInt rset 2) | ||
:minor (.getInt rset 3)}}))))) | ||
|
||
(defmethod driver/upload-type->database-type :clickhouse | ||
[_driver upload-type] | ||
(case upload-type | ||
::upload/varchar-255 "Nullable(String)" | ||
::upload/text "Nullable(String)" | ||
::upload/int "Nullable(Int64)" | ||
::upload/float "Nullable(Float64)" | ||
::upload/boolean "Nullable(Boolean)" | ||
::upload/date "Nullable(Date32)" | ||
::upload/datetime "Nullable(DateTime64(3))" | ||
calherries marked this conversation as resolved.
Show resolved
Hide resolved
|
||
;; FIXME: should be `Nullable(DateTime64(3))` | ||
::upload/offset-datetime nil)) | ||
|
||
(defmethod driver/table-name-length-limit :clickhouse | ||
[_driver] | ||
;; FIXME: This is a lie because you're really limited by a filesystems' limits, because Clickhouse uses | ||
;; filenames as table/column names. But its an approximation | ||
206) | ||
|
||
(defn- quote-name [s] | ||
(let [parts (str/split (name s) #"\.")] | ||
(str/join "." (map #(str "`" % "`") parts)))) | ||
|
||
(defn- create-table!-sql | ||
"Creates a ClickHouse table with the given name and column definitions. It assumes the engine is MergeTree, | ||
so it only works with Clickhouse Cloud and single node on-premise deployments at the moment." | ||
[driver table-name column-definitions & {:keys [primary-key]}] | ||
(str/join "\n" | ||
[(first (sql/format {:create-table (keyword table-name) | ||
:with-columns (mapv (fn [[name type-spec]] | ||
(vec (cons name [[:raw type-spec]]))) | ||
column-definitions)} | ||
:quoted true | ||
:dialect (sql.qp/quote-style driver))) | ||
"ENGINE = MergeTree" | ||
(format "ORDER BY (%s)" (str/join ", " (map quote-name primary-key)))])) | ||
|
||
(defmethod driver/create-table! :clickhouse | ||
[driver db-id table-name column-definitions & {:keys [primary-key]}] | ||
(sql-jdbc.execute/do-with-connection-with-options | ||
driver | ||
db-id | ||
{:write? true} | ||
(fn [^java.sql.Connection conn] | ||
(with-open [stmt (.createStatement conn)] | ||
(let [^ClickHouseStatementImpl stmt (.unwrap stmt ClickHouseStatementImpl) | ||
request (.getRequest stmt)] | ||
(.set request "wait_end_of_query" "1") | ||
(with-open [_response (-> request | ||
(.query ^String (create-table!-sql driver table-name column-definitions :primary-key primary-key)) | ||
(.executeAndWait))])))))) | ||
|
||
(defmethod driver/insert-into! :clickhouse | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than use the default sql-jdbc implementation, I've chosen to use the approach recommended here, to allow for large uploads. |
||
[driver db-id table-name column-names values] | ||
(when (seq values) | ||
(sql-jdbc.execute/do-with-connection-with-options | ||
driver | ||
db-id | ||
{:write? true} | ||
(fn [^java.sql.Connection conn] | ||
(let [sql (format "INSERT INTO %s (%s)" (quote-name table-name) (str/join ", " (map quote-name column-names)))] | ||
(with-open [ps (.prepareStatement conn sql)] | ||
(doseq [row values] | ||
(when (seq row) | ||
(doseq [[idx v] (map-indexed (fn [x y] [(inc x) y]) row)] | ||
(condp isa? (type v) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably not a bottleneck, but making a protocol like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, how do you imagine this being implemented exactly? The tricky part is this multimethod doesn't know the type of each column without using reflection, and values can be
That way we'll only use reflection once for every column. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I'm missing something, but I was thinking we'd just implement the protocol directly on each of these java types for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, anyway this seems pretty inconsequential compared to all the slow stuff we're doing on the Metabase side. I think I'd prefer to keep it simple and not change this |
||
java.lang.String (.setString ps idx v) | ||
java.lang.Boolean (.setBoolean ps idx v) | ||
java.lang.Long (.setLong ps idx v) | ||
java.lang.Double (.setFloat ps idx v) | ||
java.math.BigInteger (.setObject ps idx v) | ||
java.time.LocalDate (.setObject ps idx v) | ||
java.time.LocalDateTime (.setObject ps idx v) | ||
(.setString ps idx v))) | ||
(.addBatch ps))) | ||
(doall (.executeBatch ps)))))))) | ||
|
||
;;; ------------------------------------------ User Impersonation ------------------------------------------ | ||
|
||
(defmethod driver.sql/set-role-statement :clickhouse | ||
|
Uh oh!
There was an error while loading. Please reload this page.