diff --git a/app/models/bot/smooch.rb b/app/models/bot/smooch.rb index 0ce5f4ae8..3364f0027 100644 --- a/app/models/bot/smooch.rb +++ b/app/models/bot/smooch.rb @@ -838,7 +838,7 @@ def self.save_text_message(message) # strip and remove null bytes claim = self.extract_claim(text).gsub(/\s+/, ' ').strip.gsub("\u0000", "\\u0000") extra = { quote: claim } - pm = ProjectMedia.joins(:media).where('trim(lower(quote)) = ?', claim.downcase).where('project_medias.team_id' => team.id).last + pm = ProjectMedia.joins(:media).where('medias.quote_hash' => Claim.generate_hash(claim)).where('project_medias.team_id' => team.id).last # Don't create a new text media if it's an unconfirmed request with just a few words if pm.nil? && message['archived'] == CheckArchivedFlags::FlagCodes::UNCONFIRMED && ::Bot::Alegre.get_number_of_words(claim) < self.min_number_of_words_for_tipline_long_text return team diff --git a/app/models/claim.rb b/app/models/claim.rb index cca39a140..ffa68a7d6 100644 --- a/app/models/claim.rb +++ b/app/models/claim.rb @@ -12,6 +12,10 @@ def media_type 'quote' end + def self.generate_hash(claim) + Digest::MD5.hexdigest(claim.to_s.strip.downcase) + end + private def remove_null_bytes @@ -19,8 +23,9 @@ def remove_null_bytes end def set_uuid - uuid = Claim.where('lower(quote) = ?', self.quote.to_s.strip.downcase).joins("INNER JOIN project_medias pm ON pm.media_id = medias.id").first&.id + hash_value = Claim.generate_hash(self.quote) + uuid = Claim.where(quote_hash: hash_value).joins("INNER JOIN project_medias pm ON pm.media_id = medias.id").first&.id uuid ||= self.id - self.update_column(:uuid, uuid) + self.update_columns(uuid: uuid, quote_hash: hash_value) end end diff --git a/db/migrate/20260123042611_add_quote_hash_to_media_table.rb b/db/migrate/20260123042611_add_quote_hash_to_media_table.rb new file mode 100644 index 000000000..b25480f61 --- /dev/null +++ b/db/migrate/20260123042611_add_quote_hash_to_media_table.rb @@ -0,0 +1,6 @@ +class AddQuoteHashToMediaTable < ActiveRecord::Migration[6.1] + def change + add_column :medias, :quote_hash, :string + add_index :medias, :quote_hash + end +end diff --git a/db/schema.rb b/db/schema.rb index 1635224cf..2d9ac469f 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2025_11_26_195433) do +ActiveRecord::Schema.define(version: 2026_01_23_042611) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -474,8 +474,10 @@ t.integer "uuid", default: 0, null: false t.text "original_claim" t.string "original_claim_hash" + t.string "quote_hash" t.index "lower((quote)::text)", name: "index_medias_on_lower_quote", where: "((type)::text = 'Claim'::text)", using: :hash t.index ["original_claim_hash"], name: "index_medias_on_original_claim_hash", unique: true + t.index ["quote_hash"], name: "index_medias_on_quote_hash" t.index ["url"], name: "index_medias_on_url", unique: true end diff --git a/lib/tasks/migrate/20250817055243_remove_blank_media_items.rake b/lib/tasks/migrate/20250817055243_remove_blank_media_items.rake index 0bc07a011..9d5993296 100644 --- a/lib/tasks/migrate/20250817055243_remove_blank_media_items.rake +++ b/lib/tasks/migrate/20250817055243_remove_blank_media_items.rake @@ -2,7 +2,8 @@ namespace :check do namespace :migrate do def get_claim_uuid(id, quote) - uuid = Claim.where('lower(quote) = ?', quote.to_s.strip.downcase).joins("INNER JOIN project_medias pm ON pm.media_id = medias.id").first&.id + hash_value = Digest::MD5.hexdigest(quote.to_s.strip.downcase) + uuid = Claim.where(quote_hash: hash_value).joins("INNER JOIN project_medias pm ON pm.media_id = medias.id").first&.id uuid ||= id end # bundle exec rails check:migrate:migrate_published_and_unpublished_items @@ -92,19 +93,36 @@ namespace :check do minutes = ((Time.now.to_i - started) / 60).to_i puts "[#{Time.now}] Done in #{minutes} minutes." end + # rake task to set quote_hash for Claims + # bundle exec rails check:migrate:set_claim_quote_hash + task set_claim_quote_hash: :environment do + started = Time.now.to_i + last_claim_id = Rails.cache.read('check:migrate:set_claim_quote_hash') || 0 + Claim.where('id > ?', last_claim_id) + .find_in_batches(batch_size: 2000) do |claims| + c_items = [] + claims.each do |claim| + print '.' + claim.quote_hash = Digest::MD5.hexdigest(claim.quote.to_s.strip.downcase) + c_items << claim.attributes + end + Claim.upsert_all(c_items) + Rails.cache.write('check:migrate:set_claim_quote_hash', claims.pluck(:id).max) + end + minutes = ((Time.now.to_i - started) / 60).to_i + puts "[#{Time.now}] Done in #{minutes} minutes." + end # rake task to set Claim uuid # bundle exec rails check:migrate:set_claim_uuid task set_claim_uuid: :environment do started = Time.now.to_i - last_claim_id = Rails.cache.read('check:migrate:set_claim_uuid') || 0 - Claim.where('id > ?', last_claim_id).where(uuid: 0) + Claim.where(uuid: 0) .find_in_batches(batch_size: 1000) do |claims| claims.each do |claim| print '.' uuid = get_claim_uuid(claim.id, claim.quote) claim.update_column(:uuid, uuid) end - Rails.cache.write('check:migrate:set_claim_uuid', claims.pluck(:id).max) end minutes = ((Time.now.to_i - started) / 60).to_i puts "[#{Time.now}] Done in #{minutes} minutes."