Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
GIT
remote: https://github.com/boost/codeclimate_diff.git
revision: 8974e206e994dbd168a46de24faaabfea8503c8e
revision: ee2cece9fb5baffb8d9a367c2ffa41e1266a3c32
specs:
codeclimate_diff (0.1.13)
codeclimate_diff (0.1.14)
colorize
json
optparse
Expand Down Expand Up @@ -164,7 +164,7 @@ GEM
debug_inspector (>= 1.2.0)
bson (5.0.0)
builder (3.3.0)
byebug (11.1.3)
byebug (12.0.0)
case_transform (0.2)
activesupport
childprocess (5.0.0)
Expand Down Expand Up @@ -221,7 +221,7 @@ GEM
hash-deep-merge (0.1.1)
htmlentities (4.3.4)
http-accept (1.7.0)
http-cookie (1.0.7)
http-cookie (1.1.0)
domain_name (~> 0.5)
i18n (1.14.7)
concurrent-ruby (~> 1.0)
Expand All @@ -230,7 +230,7 @@ GEM
pp (>= 0.6.0)
rdoc (>= 4.0.0)
reline (>= 0.4.2)
json (2.7.2)
json (2.16.0)
json-canonicalization (1.0.0)
json-ld (3.3.1)
htmlentities (~> 4.3)
Expand Down Expand Up @@ -278,10 +278,10 @@ GEM
net-smtp
marcel (1.0.4)
method_source (1.1.0)
mime-types (3.6.0)
mime-types (3.7.0)
logger
mime-types-data (~> 3.2015)
mime-types-data (3.2024.1001)
mime-types-data (~> 3.2025, >= 3.2025.0507)
mime-types-data (3.2025.0924)
mini_mime (1.1.5)
minitest (5.25.5)
mongo (2.20.0)
Expand Down Expand Up @@ -312,7 +312,7 @@ GEM
racc (~> 1.4)
nokogiri (1.18.9-x86_64-linux-gnu)
racc (~> 1.4)
optparse (0.5.0)
optparse (0.8.0)
orm_adapter (0.5.0)
parallel (1.25.1)
parser (3.3.4.0)
Expand All @@ -323,12 +323,12 @@ GEM
pr_geohash (1.0.0)
prettyprint (0.2.0)
progressbar (1.13.0)
pry (0.14.2)
pry (0.15.2)
coderay (~> 1.1)
method_source (~> 1.0)
pry-byebug (3.10.1)
byebug (~> 11.0)
pry (>= 0.13, < 0.15)
pry-byebug (3.11.0)
byebug (~> 12.0)
pry (>= 0.13, < 0.16)
pry-rails (0.3.11)
pry (>= 0.13.0)
psych (5.2.6)
Expand Down
57 changes: 35 additions & 22 deletions app/models/supplejack_api/collection_metric.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class CollectionMetric
include Mongoid::Document
include Mongoid::Timestamps
include SupplejackApi::Concerns::QueryableByDate
include SupplejackApi::Concerns::MetricHelpers

field :d, as: :date, type: Date, default: Time.now.utc
field :dc, as: :display_collection, type: String
Expand All @@ -31,49 +32,56 @@ class CollectionMetric
)
end

def self.spawn(date_range = (30.days.ago.utc..Time.zone.now.yesterday.beginning_of_day))
def self.spawn(date_range = (Time.zone.at(0).utc..Time.now.yesterday.utc.beginning_of_day))
return unless SupplejackApi.config.log_metrics == true

dates = SupplejackApi::RecordMetric.where(date: date_range).map(&:date).uniq
dates.each do |date|
Rails.logger.info("COLLECTION METRICS: Processing date: #{date}")
collections = SupplejackApi::RecordMetric.where(date:).pluck(:display_collection).uniq
record_metrics_dates_between(date_range).each do |date|
logger.info("COLLECTION METRIC: Processing date: #{date}")
display_collections = SupplejackApi::RecordMetric
.where(date:, processed_by_collection_metrics: false)
.distinct(:display_collection)

collections.each do |collection|
Rails.logger.info("COLLECTION METRICS: Processing collection: #{collection}")
record_metrics = record_metrics_to_be_processed(date, collection)
collection_metrics = find_or_create_by(date:, display_collection: collection).inc(
searches: record_metrics.sum(:appeared_in_searches),
record_page_views: record_metrics.sum(:page_views),
user_set_views: record_metrics.sum(:user_set_views),
user_story_views: record_metrics.sum(:user_story_views),
records_added_to_user_sets: record_metrics.sum(:added_to_user_sets),
records_added_to_user_stories: record_metrics.sum(:added_to_user_stories),
total_source_clickthroughs: record_metrics.sum(:source_clickthroughs)
)
display_collections.each do |display_collection|
logger.info("COLLECTION METRIC: Processing collection: #{display_collection}")
record_metrics = record_metrics_to_be_processed(date, display_collection)

if collection_metrics.save
if update_collection_metrics(record_metrics, date, display_collection)
record_metrics.update_all(processed_by_collection_metrics: true)
else
Rails.logger.error "Unable to summarize record metrics from collection: #{collection} date: #{date}"
logger.error "Unable to summarize record metrics from collection: #{collection} date: #{date}"
end
end
regenerate_all_collection_metrics!(date)
end
end

def self.update_collection_metrics(record_metrics, date, display_collection)
collection_metrics = find_or_create_by(date:, display_collection:).inc(
searches: record_metrics.sum(:appeared_in_searches),
record_page_views: record_metrics.sum(:page_views),
user_set_views: record_metrics.sum(:user_set_views),
user_story_views: record_metrics.sum(:user_story_views),
records_added_to_user_sets: record_metrics.sum(:added_to_user_sets),
records_added_to_user_stories: record_metrics.sum(:added_to_user_stories),
total_source_clickthroughs: record_metrics.sum(:source_clickthroughs)
)

collection_metrics.save
end

def self.record_metrics_to_be_processed(date, display_collection)
Rails.logger.info("COLLECTION METRICS: Gathering records to be processed: #{date} #{display_collection}")
logger.info("COLLECTION METRIC: Gathering records to be processed: #{date} #{display_collection}")
SupplejackApi::RecordMetric.where(
date:,
display_collection:,
:processed_by_collection_metrics.in => [nil, '', false]
processed_by_collection_metrics: false
)
end

def self.regenerate_all_collection_metrics!(date)
Rails.logger.info("COLLECTION METRICS: Regenerate all collection metrics #{date}")
logger.info("COLLECTION METRIC: Regenerate all collection metrics #{date}")
delete_all(date:, display_collection: 'all')
logger.info('COLLECTION METRIC: deleted_all')
all_collections = new(date:, display_collection: 'all')
where(date:, :display_collection.nin => ['all']).find_all do |collection|
all_collections.inc(
Expand All @@ -86,6 +94,11 @@ def self.regenerate_all_collection_metrics!(date)
total_source_clickthroughs: collection.total_source_clickthroughs
).save!
end
logger.info('COLLECTION METRIC: saved')
end

def self.record_metrics_dates_between(date_range)
record_metrics_dates_between_for(:processed_by_collection_metrics, date_range)
end
end
end
37 changes: 37 additions & 0 deletions app/models/supplejack_api/concerns/metric_helpers.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# frozen_string_literal: true

module SupplejackApi
module Concerns
module MetricHelpers
extend ActiveSupport::Concern

module ClassMethods
# produce a logging prefix matching the original style in the model files
# e.g. "TopMetric" -> "TOP METRIC", "TopCollectionMetric" -> "TOP COLLECTION METRIC"
def log_prefix
klass = name.to_s.split('::').last
klass.gsub(/([a-z\d])([A-Z])/, '\1 \2').tr('_', ' ').upcase
end

# Fetch distinct dates for RecordMetric where the given processed flag is false
def record_metrics_dates_between_for(processed_field, date_range)
logger.info("#{log_prefix}: Fetching dates for #{processed_field}")
dates = SupplejackApi::RecordMetric
.where(date: date_range, processed_field => false)
.distinct(:date)
logger.info("#{log_prefix}: Processing dates: #{dates}")
dates
end

# Mark all RecordMetric rows for a given date as processed using the given flag
def stamp_record_metrics_for(processed_field, date)
logger.info("#{log_prefix}: Stamping all records on #{date} for #{processed_field}")
SupplejackApi::RecordMetric
.where(date:, processed_field => false)
.update_all(processed_field => true)
logger.info("#{log_prefix}: Stamped all records on: #{date} for #{processed_field}")
end
end
end
end
end
38 changes: 34 additions & 4 deletions app/models/supplejack_api/record_metric.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,18 @@ class RecordMetric

index({ record_id: 1, display_collection: 1, date: 1 }, background: true)

index({ display_collection: 1, date: 1, processed_by_collection_metrics: 1 }, background: true)
index({ display_collection: 1, date: 1, processed_by_top_metrics: 1 }, background: true)
index({ display_collection: 1, date: 1, processed_by_top_collection_metrics: 1 }, background: true)
index({ date: 1, display_collection: 1, processed_by_collection_metrics: 1 }, background: true)
index({ date: 1, display_collection: 1, processed_by_top_metrics: 1 }, background: true)
index({ date: 1, display_collection: 1, processed_by_top_collection_metrics: 1 }, background: true)

index({ display_collection: 1, date: 1 }, background: true)

index({ date: 1 }, background: true)

index({ date: 1, processed_by_collection_metrics: 1 }, background: true)
index({ date: 1, processed_by_top_metrics: 1 }, background: true)
index({ date: 1, processed_by_top_collection_metrics: 1 }, background: true)

index({ processed_by_collection_metrics: 1 }, background: true)
index({ processed_by_top_metrics: 1 }, background: true)
index({ processed_by_top_collection_metrics: 1 }, background: true)
Expand All @@ -41,6 +45,7 @@ class RecordMetric
processed_by_top_metrics: 1,
processed_by_top_collection_metrics: 1
},
name: 'all_metrics',
background: true
)

Expand All @@ -50,9 +55,34 @@ def self.spawn(record_id, metrics, display_collection, date = Time.now.utc.begin

collection.update_one(
{ record_id:, date: date.to_date, display_collection: },
{ '$inc' => metrics },
{
'$setOnInsert' => {
processed_by_collection_metrics: false,
processed_by_top_metrics: false,
processed_by_top_collection_metrics: false
},
'$inc' => metrics
},
upsert: true
)
end

# this method deletes processed metrics in batches to avoid memory issues
# and loads on the db
def self.delete_all_processed_metrics(batch_size = 5_000, sleep_time = 0.05)
scope = SupplejackApi::RecordMetric.where(
processed_by_collection_metrics: true,
processed_by_top_metrics: true,
processed_by_top_collection_metrics: true
)

loop do
ids = scope.only(:_id).limit(batch_size).pluck(:id)
break if ids.empty?

SupplejackApi::RecordMetric.where(:_id.in => ids).delete_all
sleep sleep_time
end
end
end
end
6 changes: 4 additions & 2 deletions app/models/supplejack_api/request_metric.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@ def self.summarize

metrics.each do |metric|
metric.records.each do |record|
summary[date][record['record_id']]['metrics'][metric.metric] += 1
summary[date][record['record_id']]['display_collection'] = record['display_collection']
record_id = record['record_id']
entry = summary[date][record_id]
entry['metrics'][metric.metric] += 1
entry['display_collection'] = record['display_collection']
end
end
end
Expand Down
32 changes: 18 additions & 14 deletions app/models/supplejack_api/top_collection_metric.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class TopCollectionMetric
include Mongoid::Document
include Mongoid::Timestamps
include SupplejackApi::Concerns::QueryableByDate
include SupplejackApi::Concerns::MetricHelpers

METRICS = %i[
page_views
Expand Down Expand Up @@ -33,10 +34,7 @@ def self.spawn(date_range = (Time.zone.at(0).utc..Time.now.yesterday.utc.beginni

metrics = []

dates = SupplejackApi::RecordMetric.where(date: date_range).map(&:date).uniq
Rails.logger.info("TOP COLLECTION METRIC: processing dates: #{dates}")

dates.each do |date|
record_metrics_dates_between(date_range).each do |date|
display_collections(date).each do |dc|
METRICS.each do |metric|
record_metrics = record_metrics_to_be_processed(date, metric, dc)
Expand All @@ -52,18 +50,18 @@ def self.spawn(date_range = (Time.zone.at(0).utc..Time.now.yesterday.utc.beginni
metrics.push(top_collection_metric)
end
end
Rails.logger.info("TOP COLLECTION METRIC: Stampping all records on #{date}")

stamp_record_metrics(date)
end

metrics
end

def self.display_collections(date)
Rails.logger.info("TOP COLLECTION METRIC: Finding all display collections on #{date}")
logger.info("TOP COLLECTION METRIC: Finding all display collections on #{date}")
SupplejackApi::RecordMetric.where(
date:,
:processed_by_top_collection_metrics.in => [nil, '', false]
processed_by_top_collection_metrics: false
).map(&:display_collection).uniq
end

Expand All @@ -75,11 +73,13 @@ def self.calculate_results(record_metrics, metric)
end

def self.update_top_collection_metric(top_collection_metric, results)
if top_collection_metric.results.blank?
existing_results = top_collection_metric.results

if existing_results.blank?
top_collection_metric.update(results:)
else
merged_results = top_collection_metric.results.merge(results) { |_key, a, b| a + b }
merged_results = merged_results.sort_by { |_k, v| -v }.first(200).to_h
merged_results = existing_results.merge(results) { |_key, existing, incoming| existing + incoming }
merged_results = merged_results.sort_by { |_k, value| -value }.first(200).to_h

top_collection_metric.update(results: merged_results)
end
Expand All @@ -96,18 +96,22 @@ def self.find_or_create_top_collection_metric(date, metric, display_collection)
end

def self.record_metrics_to_be_processed(date, metric, display_collection)
Rails.logger.info("TOP COLLECTION METRIC: Gathering top 200 records to be
processed #{date}, #{metric}, #{display_collection}")
logger.info('TOP COLLECTION METRIC: ' \
"Gathering top 200 records to be processed #{date}, #{metric}, #{display_collection}")
SupplejackApi::RecordMetric.where(
date:,
metric.ne => 0,
display_collection:,
:processed_by_top_collection_metrics.in => [nil, '', false]
processed_by_top_collection_metrics: false
).order_by(metric => 'desc').limit(200)
end

def self.record_metrics_dates_between(date_range)
record_metrics_dates_between_for(:processed_by_top_collection_metrics, date_range)
end

def self.stamp_record_metrics(date)
SupplejackApi::RecordMetric.where(date:).update_all(processed_by_top_collection_metrics: true)
stamp_record_metrics_for(:processed_by_top_collection_metrics, date)
end
end
end
Loading