|
| 1 | +module Forklift |
| 2 | + module Connection |
| 3 | + class Pg < Forklift::Base::Connection |
| 4 | + def initialize(config, forklift) |
| 5 | + begin |
| 6 | + require 'pg' |
| 7 | + rescue LoadError |
| 8 | + raise "To use the postgres connection you must add 'pg' to your Gemfile" |
| 9 | + end |
| 10 | + super(config, forklift) |
| 11 | + end |
| 12 | + |
| 13 | + def connect |
| 14 | + @client ||= PG::Connection.new(config) |
| 15 | + end |
| 16 | + |
| 17 | + def disconnect |
| 18 | + client.close |
| 19 | + end |
| 20 | + |
| 21 | + def default_matcher |
| 22 | + 'updated_at' |
| 23 | + end |
| 24 | + |
| 25 | + def drop!(table) |
| 26 | + q("DROP TABLE IF EXISTS #{quote_ident(table)}") |
| 27 | + end |
| 28 | + |
| 29 | + def rename(table, new_table) |
| 30 | + q("ALTER TABLE #{quote_ident(table)} RENAME TO #{quote_ident(new_table)}") |
| 31 | + end |
| 32 | + |
| 33 | + def read(query, database=current_database, looping=true, limit=forklift.config[:batch_size], offset=0) |
| 34 | + page = 1 |
| 35 | + loop do |
| 36 | + result = q(paginate_query(query, page, limit)) |
| 37 | + |
| 38 | + block_given? ? yield(result) : (return result) |
| 39 | + return result if result.num_tuples < limit || !looping |
| 40 | + page += 1 |
| 41 | + end |
| 42 | + end |
| 43 | + |
| 44 | + def write(rows, table, to_update=true, database=current_database, primary_key='id', lazy=true, crash_on_extral_col=false) |
| 45 | + if tables.include? table |
| 46 | + ensure_row_types(rows, table, database) |
| 47 | + elsif lazy && rows.length > 0 |
| 48 | + lazy_table_create(table, rows, database, primary_key) |
| 49 | + end |
| 50 | + |
| 51 | + insert_values = [] |
| 52 | + delete_keys = [] |
| 53 | + rows.map do |row| |
| 54 | + if to_update && !row[primary_key].nil? |
| 55 | + delete_keys << row[primary_key] |
| 56 | + else |
| 57 | + insert_values << safe_values(columns, row) |
| 58 | + end |
| 59 | + end |
| 60 | + |
| 61 | + unless delete_keys.empty? |
| 62 | + q(%{DELETE FROM #{quote_ident(table)} WHERE #{quote_ident(primary_key)} IN (#{delete_keys.join(',')})}) |
| 63 | + end |
| 64 | + |
| 65 | + q(%{INSERT INTO #{quote_ident(table)} (#{safe_columns(columns)}) VALUES #{insert_values.join(',')}}) |
| 66 | + forklift.logger.log "wrote #{rows.length} rows to `#{database}`.`#{table}`" |
| 67 | + end |
| 68 | + |
| 69 | + # @todo |
| 70 | + def lazy_table_create(table, data, database=current_database, primary_key='id', matcher=default_matcher) |
| 71 | + raise NotImplementedError.new |
| 72 | + end |
| 73 | + |
| 74 | + # @todo |
| 75 | + def sql_type(v) |
| 76 | + raise NotImplementedError.new |
| 77 | + end |
| 78 | + |
| 79 | + def read_since(table, since, matcher=default_matcher, database=current_database, limit=forklift.config[:batch_size]) |
| 80 | + query = %{SELECT * FROM #{quote_ident(table)} WHERE #{quote_ident(matcher)} >= #{client.escape_literal(since)} ORDER BY #{quote_ident(matcher)} ASC} |
| 81 | + self.read(query, database, true, limit) do |rows| |
| 82 | + if block_given? |
| 83 | + yield rows |
| 84 | + else |
| 85 | + return rows |
| 86 | + end |
| 87 | + end |
| 88 | + end |
| 89 | + |
| 90 | + def max_timestamp(table, matcher=default_matcher) |
| 91 | + row = q(%{SELECT max(#{quote_ident(matcher)}) AS 'matcher' FROM #{quote_ident(table)}}).first |
| 92 | + (row && row['matcher']) || Time.at(0) |
| 93 | + end |
| 94 | + |
| 95 | + def tables |
| 96 | + table_list = [] |
| 97 | + read(%{SELECT table_name AS "table_name" FROM information_schema.tables WHERE table_schema = 'public'}) do |result| |
| 98 | + table_list << result.map{|r| r['table_name']} |
| 99 | + end |
| 100 | + table_list.flatten.compact |
| 101 | + end |
| 102 | + |
| 103 | + def current_database |
| 104 | + client.db |
| 105 | + end |
| 106 | + |
| 107 | + def count(table) |
| 108 | + q(%{SELECT count(1) AS "count" FROM #{quote_ident(table)}})[0]['count'].to_i |
| 109 | + end |
| 110 | + |
| 111 | + def truncate!(table) |
| 112 | + q("TRUNCATE TABLE #{quote_ident(table)}") |
| 113 | + end |
| 114 | + |
| 115 | + def truncate(table) |
| 116 | + begin |
| 117 | + self.truncate!(table) |
| 118 | + rescue Exception => e |
| 119 | + forklift.logger.debug e |
| 120 | + end |
| 121 | + end |
| 122 | + |
| 123 | + def columns(table, database=current_database, return_types=false) |
| 124 | + columns = {} |
| 125 | + read(%{SELECT column_name, data_type, character_maximum_length FROM "information_schema"."columns" WHERE table_name='#{table}'}) do |rows| |
| 126 | + rows.each do |row| |
| 127 | + type = case row['data_type'] |
| 128 | + when 'character varying' then "varchar(#{row['character_maximum_length']})" |
| 129 | + else row['data_type'] |
| 130 | + end |
| 131 | + columns[row['column_name']] = type |
| 132 | + end |
| 133 | + end |
| 134 | + return_types ? columns : columns.keys |
| 135 | + end |
| 136 | + |
| 137 | + def dump(file, options=[]) |
| 138 | + end |
| 139 | + |
| 140 | + def exec_script(path) |
| 141 | + end |
| 142 | + |
| 143 | + def q(query, options={}) |
| 144 | + forklift.logger.debug "\tSQL[#{config[:database]}]: #{query}" |
| 145 | + client.exec(query) |
| 146 | + end |
| 147 | + |
| 148 | + private |
| 149 | + def ensure_row_types(rows, table, database=current_database) |
| 150 | + columns = columns(table, database) |
| 151 | + rows.each do |row| |
| 152 | + row.each do |column, value| |
| 153 | + unless columns.include?(column) |
| 154 | + q(%{ALTER TABLE #{quote_ident(table)} ADD #{quote_ident(column)} #{sql_type(value)} NULL DEFAULT NULL}) |
| 155 | + columns = columns(table, database) |
| 156 | + end |
| 157 | + end |
| 158 | + end |
| 159 | + end |
| 160 | + |
| 161 | + def paginate_query(query, page, page_size) |
| 162 | + offset = (page-1) * page_size |
| 163 | + [query, "LIMIT #{page_size} OFFSET #{offset}"].join(' ') |
| 164 | + end |
| 165 | + |
| 166 | + def quote_ident(table) |
| 167 | + PG::Connection.quote_ident(table) |
| 168 | + end |
| 169 | + end |
| 170 | + end |
| 171 | +end |
0 commit comments