Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/models/bot/smooch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,7 @@ def self.save_text_message(message)
# strip and remove null bytes
claim = self.extract_claim(text).gsub(/\s+/, ' ').strip.gsub("\u0000", "\\u0000")
extra = { quote: claim }
pm = ProjectMedia.joins(:media).where('trim(lower(quote)) = ?', claim.downcase).where('project_medias.team_id' => team.id).last
pm = ProjectMedia.joins(:media).where('medias.quote_hash' => Claim.generate_hash(claim)).where('project_medias.team_id' => team.id).last
# Don't create a new text media if it's an unconfirmed request with just a few words
if pm.nil? && message['archived'] == CheckArchivedFlags::FlagCodes::UNCONFIRMED && ::Bot::Alegre.get_number_of_words(claim) < self.min_number_of_words_for_tipline_long_text
return team
Expand Down
9 changes: 7 additions & 2 deletions app/models/claim.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,20 @@ def media_type
'quote'
end

def self.generate_hash(claim)
Digest::MD5.hexdigest(claim.to_s.strip.downcase)
end

private

def remove_null_bytes
self.quote = self.quote.gsub("\u0000", "\\u0000") unless self.quote.nil?
end

def set_uuid
uuid = Claim.where('lower(quote) = ?', self.quote.to_s.strip.downcase).joins("INNER JOIN project_medias pm ON pm.media_id = medias.id").first&.id
hash_value = Claim.generate_hash(self.quote)
uuid = Claim.where(quote_hash: hash_value).joins("INNER JOIN project_medias pm ON pm.media_id = medias.id").first&.id
uuid ||= self.id
self.update_column(:uuid, uuid)
self.update_columns(uuid: uuid, quote_hash: hash_value)
end
end
6 changes: 6 additions & 0 deletions db/migrate/20260123042611_add_quote_hash_to_media_table.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class AddQuoteHashToMediaTable < ActiveRecord::Migration[6.1]
def change
add_column :medias, :quote_hash, :string
add_index :medias, :quote_hash
end
end
4 changes: 3 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2025_11_26_195433) do
ActiveRecord::Schema.define(version: 2026_01_23_042611) do

# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
Expand Down Expand Up @@ -474,8 +474,10 @@
t.integer "uuid", default: 0, null: false
t.text "original_claim"
t.string "original_claim_hash"
t.string "quote_hash"
t.index "lower((quote)::text)", name: "index_medias_on_lower_quote", where: "((type)::text = 'Claim'::text)", using: :hash
t.index ["original_claim_hash"], name: "index_medias_on_original_claim_hash", unique: true
t.index ["quote_hash"], name: "index_medias_on_quote_hash"
t.index ["url"], name: "index_medias_on_url", unique: true
end

Expand Down
26 changes: 22 additions & 4 deletions lib/tasks/migrate/20250817055243_remove_blank_media_items.rake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
namespace :check do
namespace :migrate do
def get_claim_uuid(id, quote)
uuid = Claim.where('lower(quote) = ?', quote.to_s.strip.downcase).joins("INNER JOIN project_medias pm ON pm.media_id = medias.id").first&.id
hash_value = Digest::MD5.hexdigest(quote.to_s.strip.downcase)
uuid = Claim.where(quote_hash: hash_value).joins("INNER JOIN project_medias pm ON pm.media_id = medias.id").first&.id
uuid ||= id
end
# bundle exec rails check:migrate:migrate_published_and_unpublished_items
Expand Down Expand Up @@ -92,19 +93,36 @@ namespace :check do
minutes = ((Time.now.to_i - started) / 60).to_i
puts "[#{Time.now}] Done in #{minutes} minutes."
end
# rake task to set quote_hash for Claims
# bundle exec rails check:migrate:set_claim_quote_hash
task set_claim_quote_hash: :environment do
started = Time.now.to_i
last_claim_id = Rails.cache.read('check:migrate:set_claim_quote_hash') || 0
Claim.where('id > ?', last_claim_id)
.find_in_batches(batch_size: 2000) do |claims|
c_items = []
claims.each do |claim|
print '.'
claim.quote_hash = Digest::MD5.hexdigest(claim.quote.to_s.strip.downcase)
c_items << claim.attributes
end
Claim.upsert_all(c_items)
Rails.cache.write('check:migrate:set_claim_quote_hash', claims.pluck(:id).max)
end
minutes = ((Time.now.to_i - started) / 60).to_i
puts "[#{Time.now}] Done in #{minutes} minutes."
end
# rake task to set Claim uuid
# bundle exec rails check:migrate:set_claim_uuid
task set_claim_uuid: :environment do
started = Time.now.to_i
last_claim_id = Rails.cache.read('check:migrate:set_claim_uuid') || 0
Claim.where('id > ?', last_claim_id).where(uuid: 0)
Claim.where(uuid: 0)
.find_in_batches(batch_size: 1000) do |claims|
claims.each do |claim|
print '.'
uuid = get_claim_uuid(claim.id, claim.quote)
claim.update_column(:uuid, uuid)
end
Rails.cache.write('check:migrate:set_claim_uuid', claims.pluck(:id).max)
end
minutes = ((Time.now.to_i - started) / 60).to_i
puts "[#{Time.now}] Done in #{minutes} minutes."
Expand Down
Loading