Each party waives its rights to a jury trial in any resulting litigation. diff --git a/ b/ new file mode 100644 index 0000000..a95814b --- /dev/null +++ b/ @@ -0,0 +1,68 @@ +# Metabase Teradata Driver (Community-Supported) + +The Metabase Teradata driver allows Metabase v0.32.7 or above to +connect to [Teradata]( databases. +Instructions for installing it can be found below. + +This driver is community-supported and is not considered part of the +core Metabase project. If you would like to open a GitHub issue to +report a bug or request new features, or would like to open a pull +requests against it, please do so in this repository, and not in the +core Metabase GitHub repository. + +## Obtaining the Teradata Driver + +### Where to find it + +[Click here]( to view the latest release of the Metabase Teradata driver; click the link to download `teradata.metabase-driver.jar`. + +You can find past releases of the Teradata driver [here]( + + +### How to Install it + +Metabase will automatically make the Teradata driver available if it finds the driver and the proprietary jdbc JARs in the Metabase plugins directory when it starts up. +All you need to do is create the directory `plugins` (if it's not already there), move the JAR you just downloaded into it, and restart Metabase. + +By default, the plugins directory is called `plugins`, and lives in the same directory as the Metabase JAR. + +For example, if you're running Metabase from a directory called `/app/`, you should move the Teradata driver and the proprietary jdbc JARs to `/app/plugins/`: + +```bash +# example directory structure for running Metabase with Teradata support +/app/metabase.jar +/app/plugins/teradata.metabase-driver.jar +/app/plugins/tdgssconfig.jar +/app/plugins/terajdbc4.jar +``` + +If you're running Metabase from the Mac App, the plugins directory defaults to `~/Library/Application Support/Metabase/Plugins/`: + +```bash +# example directory structure for running Metabase Mac App with Teradata support +/Users/you/Library/Application Support/Metabase/Plugins/teradata.metabase-driver.jar +/Users/you/Library/Application Support/Metabase/Plugins/tdgssconfig.jar +/Users/you/Library/Application Support/Metabase/Plugins/terajdbc4.jar +``` + +If you are running the Docker image or you want to use another directory for plugins, you should specify a custom plugins directory by setting the environment variable `MB_PLUGINS_DIR`. + + +## Building the Teradata Driver Yourself + +### Prereqs: Install Metabase locally, compiled for building drivers + +```bash +cd /path/to/metabase/source +lein install-for-building-drivers +``` + +### Build it + +```bash +cd /path/to/teradata-driver +lein clean +DEBUG=1 LEIN_SNAPSHOTS_IN_RELEASE=true lein uberjar +``` + +This will build a file called `target/uberjar/teradata.metabase-driver.jar`; copy this to your Metabase `./plugins` directory. \ No newline at end of file diff --git a/project.clj b/project.clj new file mode 100644 index 0000000..d561d41 --- /dev/null +++ b/project.clj @@ -0,0 +1,16 @@ +(defproject metabase/teradata-driver "1.0.0-teradata-jdbc-16.00" + :min-lein-version "2.5.0" + + :profiles + {:provided + {:dependencies + [[org.clojure/clojure "1.10.0"] + [metabase-core "1.0.0-SNAPSHOT"]]} + + :uberjar + {:auto-clean true + :aot :all + :omit-source true + :javac-options ["-target" "1.8", "-source" "1.8"] + :target-path "target/%s" + :uberjar-name "teradata.metabase-driver.jar"}}) diff --git a/resources/metabase-plugin.yaml b/resources/metabase-plugin.yaml new file mode 100644 index 0000000..ca560d9 --- /dev/null +++ b/resources/metabase-plugin.yaml @@ -0,0 +1,36 @@ +info: + name: Metabase Teradata Driver + version: 1.0.0-teradata-jdbc-16.00 + description: Allows Metabase to connect to Teradata databases. Community Supported driver. +dependencies: + - class: com.teradata.jdbc.TeraDriver + message: > + Metabase requires the Teradata terajdbc4.jar and tdgssconfig.jar in order to be able to connect to Teradata databases. +driver: + name: teradata + display-name: Teradata + lazy-load: true + parent: sql-jdbc + connection-properties: + - host + - merge: + - port + - default: 1025 + - name: dbnames + display-name: Database name(s) (case sensitive) + placeholder: Comma-separated list of database names + - name: tmode + display-name: Transaction mode + default: ANSI + - user + - password + - ssl + - merge: + - additional-options + - placeholder: e.g. COPLAST=OFF + connection-properties-include-tunnel-config: true +init: + - step: load-namespace + namespace: metabase.driver.teradata + - step: register-jdbc-driver + class: com.teradata.jdbc.TeraDriver diff --git a/src/metabase/driver/sql/util/deduplicate.clj b/src/metabase/driver/sql/util/deduplicate.clj new file mode 100644 index 0000000..55d6756 --- /dev/null +++ b/src/metabase/driver/sql/util/deduplicate.clj @@ -0,0 +1,49 @@ +(ns metabase.driver.sql.util.deduplicate + "Utility function for de-duplication as used by Oracle and Teradata drivers. Extracted from Oracle driver" + (:require [clojure.string :as str])) + +(defn- increment-identifier-suffix + "Add an appropriate suffix to a keyword IDENTIFIER to make it distinct from previous usages of the same identifier, + e.g. + + (increment-identifier-suffix :my_col) ; -> :my_col_2 + (increment-identifier-suffix :my_col_2) ; -> :my_col_3" + [identifier] + (keyword + (let [identifier (name identifier)] + (if-let [[_ existing-suffix] (re-find #"^.*_(\d+$)" identifier)] + ;; if identifier already has an alias like col_2 then increment it to col_3 + (let [new-suffix (str (inc (Integer/parseInt existing-suffix)))] + (clojure.string/replace identifier (re-pattern (str existing-suffix \$)) new-suffix)) + ;; otherwise just stick a _2 on the end so it's col_2 + (str identifier "_2"))))) + +(defn- alias-everything + "Make sure all the columns in SELECT-CLAUSE are alias forms, e.g. `[:table.col :col]` instead of `:table.col`. + (This faciliates our deduplication logic.)" + [select-clause] + (for [col select-clause] + (if (sequential? col) + ;; if something's already an alias form like [:table.col :col] it's g2g + col + ;; otherwise if it's something like :table.col replace with [:table.col :col] + [col (keyword (last (clojure.string/split (name col) #"\.")))]))) + +(defn deduplicate-identifiers + "Make sure every column in SELECT-CLAUSE has a unique alias. + This is done because Oracle can't figure out how to use a query + that produces duplicate columns in a subselect." + [select-clause] + (if (= select-clause [:*]) + ;; if we're doing `SELECT *` there's no way we can deduplicate anything so we're SOL, return as-is + select-clause + ;; otherwise we can actually deduplicate things + (loop [already-seen #{}, acc [], [[col alias] & more] (alias-everything select-clause)] + (cond + ;; if not more cols are left to deduplicate, we're done + (not col) acc + ;; otherwise if we've already used this alias, replace it with one like `identifier_2` and try agan + (contains? already-seen alias) (recur already-seen acc (cons [col (increment-identifier-suffix alias)] + more)) + ;; otherwise if we haven't seen it record it as seen and move on to the next column + :else (recur (conj already-seen alias) (conj acc [col alias]) more))))) \ No newline at end of file diff --git a/src/metabase/driver/teradata.clj b/src/metabase/driver/teradata.clj new file mode 100644 index 0000000..3ffb33a --- /dev/null +++ b/src/metabase/driver/teradata.clj @@ -0,0 +1,238 @@ +(ns metabase.driver.teradata + (:require [clojure + [set :as set] + [string :as s]] + [ :as jdbc] + [honeysql.core :as hsql] + [metabase + [driver :as driver] + [util :as u]] + [metabase.driver.sql-jdbc + [common :as sql-jdbc.common] + [connection :as sql-jdbc.conn] + [execute :as sql-jdbc.execute] + [sync :as sql-jdbc.sync]] + [metabase.driver.sql.query-processor :as sql.qp] + [metabase.driver.sql.util.deduplicate :as deduplicateutil] + [metabase.models.field :as field] + [metabase.query-processor.util :as qputil] + [metabase.util + [honeysql-extensions :as hx] + [ssh :as ssh]]) + (:import [java.sql DatabaseMetaData ResultSet])) + +(driver/register! :teradata, :parent :sql-jdbc) + +(defmethod sql-jdbc.sync/database-type->base-type :teradata [_ column-type] + ({:BIGINT :type/BigInteger + :BIGSERIAL :type/BigInteger + :BIT :type/* + :BLOB :type/* + :BOX :type/* + :CHAR :type/Text + :CLOB :type/Text + :BYTE :type/* + :BYTEINT :type/Integer + :DATE :type/Date + :DECIMAL :type/Decimal + :FLOAT :type/Float + :FLOAT4 :type/Float + :FLOAT8 :type/Float + :INTEGER :type/Integer + :INT :type/Integer + :INT2 :type/Integer + :INT4 :type/Integer + :INT8 :type/BigInteger + :INTERVAL :type/* ; time span + :JSON :type/Text + :LONGVARCHAR :type/Text ; Teradata extension + :LSEG :type/* + :MACADDR :type/Text + :MONEY :type/Decimal + :NUMERIC :type/Decimal + :PATH :type/* + :POINT :type/* + :REAL :type/Float + :SERIAL :type/Integer + :SERIAL2 :type/Integer + :SERIAL4 :type/Integer + :SERIAL8 :type/BigInteger + :SMALLINT :type/Integer + :SMALLSERIAL :type/Integer + :TIME :type/Time + :TIMETZ :type/Time + :TIMESTAMP :type/DateTime + :TIMESTAMPTZ :type/DateTime + (keyword "TIMESTAMP WITH TIME ZONE") :type/DateTime + :TSQUERY :type/* + :TSVECTOR :type/* + :TXID_SNAPSHOT :type/* + :UUID :type/UUID + :VARBIT :type/* + :VARBYTE :type/* ; byte array + :VARCHAR :type/Text + :XML :type/Text + (keyword "bit varying") :type/* + (keyword "character varying") :type/Text + (keyword "double precision") :type/Float + (keyword "time with time zone") :type/Time + (keyword "time without time zone") :type/Time + (keyword "timestamp with timezone") :type/DateTime + (keyword "timestamp without timezone") :type/DateTime}, column-type)) + +(defn- dbnames-set + "Transform the string of databases to a set of strings." + [dbnames] + (when dbnames + (set (map #(s/trim %) (s/split (s/trim dbnames) #","))))) + +(defn- teradata-spec + "Create a database specification for a teradata database. Opts should include keys + for :db, :user, and :password. You can also optionally set host and port. + Delimiters are automatically set to \"`\"." + [{:keys [host port dbnames charset tmode encrypt-data] + :or {host "localhost", charset "UTF8", tmode "ANSI", encrypt-data true} + :as opts}] + (merge {:classname "com.teradata.jdbc.TeraDriver" + :subprotocol "teradata" + :subname (str "//" host "/" + (->> (merge + (when dbnames + {"DATABASE" (first (dbnames-set dbnames))}) + (when port + {"DBS_PORT" port}) + {"CHARSET" charset + "TMODE" tmode + "ENCRYPTDATA" (if encrypt-data "ON" "OFF") + "FINALIZE_AUTO_CLOSE" "ON" + ;; We don't need lob support in metabase. This also removes the limitation of 16 open statements per session which would interfere metadata crawling. + "LOB_SUPPORT" "OFF" + }) + (map #(format "%s=%s" (first %) (second %))) + (clojure.string/join ","))) + :delimiters "`"} + (dissoc opts :host :port :dbnames :tmode :charset))) + +(defmethod sql-jdbc.conn/connection-details->spec :teradata + [_ {ssl? :ssl, :as details-map}] + (-> details-map + teradata-spec + (sql-jdbc.common/handle-additional-options details-map, :seperator-style :comma))) + +;; trunc always returns a date in Teradata +(defn- date-trunc [unit expr] (hsql/call :trunc expr (hx/literal unit))) + +(defn- timestamp-trunc [unit expr] (hsql/call :to_timestamp + (hsql/call :to_char + expr + unit) unit)) + +(defn- extract [unit expr] (hsql/call :extract unit expr)) + +(def ^:private extract-integer (comp hx/->integer extract)) + +(def ^:private ^:const one-day (hsql/raw "INTERVAL '1' DAY")) + +(def ^:private ^:const now (hsql/raw "CURRENT_TIMESTAMP")) + +(defmethod sql.qp/date [:teradata :default] [_ _ expr] (some-> expr hx/->date)) +(defmethod sql.qp/date [:teradata :minute] [_ _ expr] (timestamp-trunc "yyyy-mm-dd hh24:mi" expr)) +(defmethod sql.qp/date [:teradata :minute-of-hour] [_ _ expr] (extract-integer :minute expr)) +(defmethod sql.qp/date [:teradata :hour] [_ _ expr] (timestamp-trunc "yyyy-mm-dd hh24" expr)) +(defmethod sql.qp/date [:teradata :hour-of-day] [_ _ expr] (extract-integer :hour expr)) +(defmethod sql.qp/date [:teradata :day] [_ _ expr] (hx/->date expr)) +(defmethod sql.qp/date [:teradata :day-of-week] [_ _ expr] (hx/inc (hx/- (sql.qp/date :day expr) + (sql.qp/date :week expr)))) +(defmethod sql.qp/date [:teradata :day-of-month] [_ _ expr] (extract-integer :day expr)) +(defmethod sql.qp/date [:teradata :day-of-year] [_ _ expr] (hx/inc (hx/- (sql.qp/date :day expr) (date-trunc :year expr)))) +(defmethod sql.qp/date [:teradata :week] [_ _ expr] (date-trunc :day expr)) ; Same behaviour as with Oracle. +(defmethod sql.qp/date [:teradata :week-of-year] [_ _ expr] (hx/inc (hx// (hx/- (date-trunc :iw expr) + (date-trunc :iy expr)) + 7))) +(defmethod sql.qp/date [:teradata :month] [_ _ expr] (date-trunc :mm expr)) +(defmethod sql.qp/date [:teradata :month-of-year] [_ _ expr] (extract-integer :month expr)) +(defmethod sql.qp/date [:teradata :quarter] [_ _ expr] (date-trunc :q expr)) +(defmethod sql.qp/date [:teradata :quarter-of-year] [_ _ expr] (hx// (hx/+ (sql.qp/date :month-of-year (sql.qp/date :quarter expr)) 2) 3)) +(defmethod sql.qp/date [:teradata :year] [_ _ expr] (extract-integer :year expr)) + +(defmethod sql.qp/unix-timestamp->timestamp [:teradata :seconds] [_ _ field-or-value] + (hsql/call :to_timestamp field-or-value)) + +(defmethod sql.qp/unix-timestamp->timestamp [:teradata :milliseconds] [_ _ field-or-value] + (sql.qp/unix-timestamp->timestamp (hx// field-or-value 1000) :seconds)) + +(defmethod sql.qp/apply-top-level-clause [:teradata :limit] [_ _ honeysql-form {value :limit}] + (update (assoc honeysql-form :modifiers [(format "TOP %d" value)]) :select deduplicateutil/deduplicate-identifiers)) + +(defmethod sql.qp/apply-top-level-clause [:teradata :page] [_ _ honeysql-form {{:keys [items page]} :page}] + (assoc honeysql-form :offset (hsql/raw (format "QUALIFY ROW_NUMBER() OVER (%s) BETWEEN %d AND %d" + (first (hsql/format (select-keys honeysql-form [:order-by]) + :allow-dashed-names? true + :quoting :ansi)) + (inc (* items (dec page))) + (* items page))))) + +(def excluded-schemas + #{"SystemFe" "SYSLIB" "LockLogShredder" "Sys_Calendar" "SYSBAR" "SYSUIF" + "dbcmngr" "tdwm" "TDStats" "TDQCD" "SQLJ" "SysAdmin" "SYSSPATIAL" "DBC" "Crashdumps" "External_AP" "TDPUSER"}) + +(defmethod sql-jdbc.sync/excluded-schemas :teradata [_] + excluded-schemas) + +;; Teradata uses ByteInt with values `1`/`0` for boolean `TRUE`/`FALSE`. +(defmethod sql.qp/->honeysql [:teradata Boolean] + [_ bool] + (if bool 1 0)) + +(defn- get-tables + "Fetch a JDBC Metadata ResultSet of tables in the DB, optionally limited to ones belonging to a given schema." + ^ResultSet [^DatabaseMetaData metadata, ^String schema-or-nil] + (jdbc/result-set-seq (.getTables metadata nil schema-or-nil "%" ; tablePattern "%" = match all tables + (into-array String ["TABLE", "VIEW", "FOREIGN TABLE"])))) + +(defn- fast-active-tables + "Teradata, fast implementation of `fast-active-tables` to support inclusion list." + [driver, ^DatabaseMetaData metadata, {{:keys [dbnames]} :details, :as database}] + (let [all-schemas (set (map :table_schem (jdbc/result-set-seq (.getSchemas metadata)))) + dbs (dbnames-set dbnames) + schemas (if (empty? dbs) + (set/difference all-schemas excluded-schemas) ; use default exclusion list + (set/intersection all-schemas dbs))] ; use defined inclusion list + (set (for [schema schemas + table-name (mapv :table_name (get-tables metadata schema))] + {:name table-name + :schema schema})))) + +;; Overridden to have access to the database with the configured property dbnames (inclusion list) +;; which will be used to filter the schemas. +(defmethod driver/describe-database :teradata + [driver database] + (jdbc/with-db-metadata [metadata driver database] + {:tables (fast-active-tables, driver, ^DatabaseMetaData metadata, database)})) + +(defn- run-query + "Run the query itself without setting the timezone connection parameter as this must not be changed on a Teradata connection. + Setting connection attributes like timezone would make subsequent queries behave unexpectedly." + [{sql :query, params :params, remark :remark} timezone connection] + (let [sql (s/replace (s/replace (str "-- " remark "\n" (hx/unescape-dots sql)) "OFFSET" "") "test_data" "test-data") ;; temporary hack + statement (into [sql] params) + [columns & rows] (jdbc/query connection statement {:identifiers identity, :as-arrays? true + :read-columns (#'metabase.driver.sql-jdbc.execute/read-columns :teradata timezone)})] + {:rows (or rows []) + :columns columns})) + +(defn- run-query-without-timezone [driver settings connection query] + (#'metabase.driver.sql-jdbc.execute/do-in-transaction connection (partial run-query query nil))) + +(defmethod driver/execute-query :teradata + [driver {:keys [database settings], query :native, :as outer-query}] + (let [query (assoc query :remark (qputil/query->remark outer-query))] + (#'metabase.driver.sql-jdbc.execute/do-with-try-catch + (fn [] + (let [db-connection (sql-jdbc.conn/db->pooled-connection-spec database)] + (run-query-without-timezone driver settings db-connection query)))))) + +(defmethod sql.qp/current-datetime-fn :teradata [_] now) + +; TODO check if overriding apply-top-level-clause could make nested queries work +(defmethod driver/supports? [:teradata :nested-queries] [_ _] false) \ No newline at end of file diff --git a/test/metabase/driver/teradata_test.clj b/test/metabase/driver/teradata_test.clj new file mode 100644 index 0000000..a019fd0 --- /dev/null +++ b/test/metabase/driver/teradata_test.clj @@ -0,0 +1,12 @@ +(ns metabase.driver.teradata-test + (:require [expectations :refer :all] + [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn])) + +;; Check that additional JDBC options are handled correctly. This is comma separated for Teradata. +(expect + {:classname "com.teradata.jdbc.TeraDriver" + :subprotocol "teradata" + :subname "//localhost/CHARSET=UTF8,TMODE=ANSI,ENCRYPTDATA=ON,FINALIZE_AUTO_CLOSE=ON,LOB_SUPPORT=OFF,COP=OFF" + :delimiters "`"} + (-> (sql-jdbc.conn/connection-details->spec :teradata {:host "localhost" + :additional-options "COP=OFF"}))) \ No newline at end of file diff --git a/test/metabase/test/data/teradata.clj b/test/metabase/test/data/teradata.clj new file mode 100644 index 0000000..90235e8 --- /dev/null +++ b/test/metabase/test/data/teradata.clj @@ -0,0 +1,60 @@ +(ns + (:require [ :as jdbc] + [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] + [ + [interface :as tx] + [sql :as sql.tx] + [sql-jdbc :as sql-jdbc.tx]] + [ + [execute :as execute] + [load-data :as load-data]] + [metabase.util :as u])) + +(sql-jdbc.tx/add-test-extensions! :teradata) + +(defmethod sql.tx/field-base-type->sql-type [:teradata :type/BigInteger] [_ _] "BIGINT") +(defmethod sql.tx/field-base-type->sql-type [:teradata :type/Boolean] [_ _] "BYTEINT") +(defmethod sql.tx/field-base-type->sql-type [:teradata :type/Date] [_ _] "DATE") +(defmethod sql.tx/field-base-type->sql-type [:teradata :type/DateTime] [_ _] "TIMESTAMP") +(defmethod sql.tx/field-base-type->sql-type [:teradata :type/Decimal] [_ _] "DECIMAL") +(defmethod sql.tx/field-base-type->sql-type [:teradata :type/Float] [_ _] "FLOAT") +(defmethod sql.tx/field-base-type->sql-type [:teradata :type/Integer] [_ _] "INTEGER") +(defmethod sql.tx/field-base-type->sql-type [:teradata :type/Text] [_ _] "VARCHAR2(2048)") +(defmethod sql.tx/field-base-type->sql-type [:teradata :type/Time] [_ _] "TIME") + +;; Tested using Teradata Express VM image. Set the host to the correct address if localhost does not work. +(def ^:private connection-details + (delay + {:host (tx/db-test-env-var-or-throw :teradata :host "localhost") + :user (tx/db-test-env-var-or-throw :teradata :user "dbc") + :password (tx/db-test-env-var-or-throw :teradata :password "dbc")})) + +(defmethod tx/dbdef->connection-details :teradata [& _] @connection-details) + +(defmethod sql.tx/drop-table-if-exists-sql :teradata [_ {:keys [database-name]} {:keys [table-name]}] + (format "DROP TABLE \"%s\".\"%s\"⅋" database-name table-name)) + +(defmethod sql.tx/create-db-sql :teradata [_ {:keys [database-name]}] + (format "CREATE user \"%s\" AS password=\"%s\" perm=524288000 spool=524288000;" database-name database-name)) + +(defmethod sql.tx/drop-db-if-exists-sql :teradata [_ {:keys [database-name]}] + (format "DELETE user \"%s\" ALL; DROP user \"%s\";" database-name database-name)) + +(defmethod sql.tx/qualified-name-components :teradata + ([_ db-name] [db-name]) + ([_ db-name table-name] [db-name table-name]) + ([_ db-name table-name field-name] [db-name table-name field-name])) + +(defn- dbspec [& _] + (sql-jdbc.conn/connection-details->spec :teradata @connection-details)) + +;; TODO override execute to be able to suppress db/table does not exist error. + +(defmethod execute/execute-sql! :teradata [& args] + (apply execute/sequentially-execute-sql! args)) + +(defmethod load-data/load-data! :teradata [& args] + (apply load-data/load-data-one-at-a-time! args)) + +(defmethod sql.tx/pk-sql-type :teradata [_] + "INTEGER NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1 MINVALUE -2147483647 MAXVALUE 2147483647 NO CYCLE)")