diff --git a/app/api/entities/envelope_download.rb b/app/api/entities/envelope_download.rb index 724cd1f6..c69c2b5d 100644 --- a/app/api/entities/envelope_download.rb +++ b/app/api/entities/envelope_download.rb @@ -2,14 +2,24 @@ module API module Entities # Presenter for EnvelopeDownload class EnvelopeDownload < Grape::Entity - expose :id, - documentation: { type: 'string', desc: 'ID (in UUID format)' } + expose :display_status, as: :status, + documentation: { type: 'string', desc: 'Status of download' } - expose :status, - documentation: { type: 'string', desc: 'Status of download' } + expose :enqueued_at, + documentation: { type: 'string', desc: 'When the download was enqueued' }, + if: ->(object) { object.pending? } + + expose :finished_at, + documentation: { type: 'string', desc: 'When the download finished' }, + if: ->(object) { object.finished? } + + expose :started_at, + documentation: { type: 'string', desc: 'When the download started' }, + if: ->(object) { object.in_progress? } expose :url, - documentation: { type: 'string', desc: 'AWS S3 URL' } + documentation: { type: 'string', desc: 'AWS S3 URL' }, + if: ->(object) { object.finished? } end end end diff --git a/app/api/v1/envelopes.rb b/app/api/v1/envelopes.rb index 8c4e330a..cef605b7 100644 --- a/app/api/v1/envelopes.rb +++ b/app/api/v1/envelopes.rb @@ -11,6 +11,7 @@ require 'v1/single_envelope' require 'v1/revisions' require 'v1/envelope_events' +require 'download_envelopes_job' module API module V1 @@ -62,10 +63,36 @@ class Envelopes < MountableAPI type: params[:metadata_only] ? :metadata_only : :full end + include API::V1::EnvelopeEvents + desc 'Gives general info about the envelopes' get(:info) { envelopes_info } - include API::V1::EnvelopeEvents + resources :download do + before do + authenticate! + authorize Envelope, :index? + + @envelope_download = current_community.envelope_download || + current_community.create_envelope_download! + end + + desc 'Returns the envelope download' + get do + present @envelope_download, with: API::Entities::EnvelopeDownload + end + + desc 'Starts an envelope download' + post do + @envelope_download.update!( + enqueued_at: Time.current, + status: :pending + ) + + DownloadEnvelopesJob.perform_later(@envelope_download.id) + present @envelope_download, with: API::Entities::EnvelopeDownload + end + end route_param :envelope_id do after_validation do @@ -86,28 +113,6 @@ class Envelopes < MountableAPI include API::V1::SingleEnvelope include API::V1::Revisions end - - resources :downloads do - before do - authenticate! - end - - desc 'Returns the download object with the given ID' - get ':id' do - authorize Envelope, :index? - - envelope_download = current_user_community.envelope_downloads.find(params[:id]) - present envelope_download, with: API::Entities::EnvelopeDownload - end - - desc 'Starts new envelope download' - post do - authorize Envelope, :index? - - present current_user_community.envelope_downloads.create!, - with: API::Entities::EnvelopeDownload - end - end end end end diff --git a/app/jobs/download_envelopes_job.rb b/app/jobs/download_envelopes_job.rb index 04ccd67c..b330c82d 100644 --- a/app/jobs/download_envelopes_job.rb +++ b/app/jobs/download_envelopes_job.rb @@ -1,4 +1,4 @@ -require 'entities/envelope' +require 'download_envelopes' require 'envelope_download' # Create a ZIP archive contaning all of the envelopes from a certain community, @@ -10,56 +10,9 @@ def perform(envelope_download_id) envelope_download = EnvelopeDownload.find_by(id: envelope_download_id) return unless envelope_download - envelope_download.update!( - internal_error_backtrace: [], - internal_error_message: nil, - started_at: Time.current - ) - - envelope_download.url = upload_to_s3(envelope_download) + DownloadEnvelopes.call(envelope_download:) rescue StandardError => e Airbrake.notify(e, envelope_download_id:) - envelope_download&.internal_error_backtrace = e.backtrace - envelope_download&.internal_error_message = e.message - ensure - envelope_download&.update!(finished_at: Time.current) - end - - private - - def bucket - ENV.fetch('ENVELOPE_DOWNLOADS_BUCKET') - end - - def create_zip_archive(envelope_download) - envelopes = envelope_download.envelopes.includes( - :envelope_community, :organization, :publishing_organization - ) - - file_path = MR.root_path.join(SecureRandom.hex) - - Zip::OutputStream.open(file_path) do |stream| - envelopes.find_each do |envelope| - stream.put_next_entry("#{envelope.envelope_ceterms_ctid}.json") - stream.puts(API::Entities::Envelope.represent(envelope).to_json) - end - end - - file_path - end - - def region - ENV.fetch('AWS_REGION') - end - - def upload_to_s3(envelope_download) - community = envelope_download.envelope_community.name - key = "#{community}_#{Time.current.to_i}_#{SecureRandom.hex}.zip" - path = create_zip_archive(envelope_download) - object = Aws::S3::Resource.new(region:).bucket(bucket).object(key) - object.upload_file(path) - object.public_url - ensure - File.delete(path) + raise e end end diff --git a/app/models/envelope_community.rb b/app/models/envelope_community.rb index bd52696c..59e91aef 100644 --- a/app/models/envelope_community.rb +++ b/app/models/envelope_community.rb @@ -6,7 +6,7 @@ class EnvelopeCommunity < ActiveRecord::Base include AttributeNormalizer has_one :envelope_community_config - has_many :envelope_downloads + has_one :envelope_download has_many :envelopes has_many :envelope_resources, through: :envelopes has_many :indexed_envelope_resources diff --git a/app/models/envelope_download.rb b/app/models/envelope_download.rb index 872eca79..6b87dbe7 100644 --- a/app/models/envelope_download.rb +++ b/app/models/envelope_download.rb @@ -1,25 +1,17 @@ -require 'download_envelopes_job' - # Stores the status and AWS S3 URL of an asynchronously performed envelope download class EnvelopeDownload < ActiveRecord::Base belongs_to :envelope_community has_many :envelopes, -> { not_deleted }, through: :envelope_community - after_commit :enqueue_job, on: :create - - def status - if finished_at? - return internal_error_message? ? 'failed' : 'finished' - elsif started_at? - return 'in progress' - end - - 'pending' - end + enum :status, { + finished: 'finished', + in_progress: 'in_progress', + pending: 'pending' + } - private + def display_status + return 'failed' if internal_error_message? - def enqueue_job - DownloadEnvelopesJob.perform_later(id) + status end end diff --git a/app/models/extensions/ce_registry_resources.rb b/app/models/extensions/ce_registry_resources.rb index e52f1991..614b7b1c 100644 --- a/app/models/extensions/ce_registry_resources.rb +++ b/app/models/extensions/ce_registry_resources.rb @@ -22,7 +22,7 @@ def ce_registry? end def self.generate_ctid - "urn:ctid:#{SecureRandom.uuid}" + "ce-#{SecureRandom.uuid}" end end end diff --git a/app/services/download_envelopes.rb b/app/services/download_envelopes.rb new file mode 100644 index 00000000..5fdfb74f --- /dev/null +++ b/app/services/download_envelopes.rb @@ -0,0 +1,149 @@ +# Dumps an envelope community's envelopes into a ZIP archive and uploads it to S3 +class DownloadEnvelopes # rubocop:todo Metrics/ClassLength + attr_reader :envelope_download, :updated_at + + delegate :envelope_community, to: :envelope_download + + def initialize(envelope_download) + @envelope_download = envelope_download + @updated_at = envelope_download.started_at + end + + def self.call(envelope_download:) + new(envelope_download).run + end + + def bucket + ENV.fetch('ENVELOPE_DOWNLOADS_BUCKET') + end + + def create_or_update_entries + FileUtils.mkdir_p(dirname) + + log('Adding recently published envelopes into the dump') + + published_envelopes.find_each do |envelope| + File.write( + File.join(dirname, "#{envelope.envelope_ceterms_ctid}.json"), + API::Entities::Envelope.represent(envelope).to_json + ) + end + end + + def dirname + @dirname ||= [ + envelope_community.name, + Time.current.to_i, + SecureRandom.hex + ].join('_') + end + + def download_file # rubocop:todo Metrics/AbcSize + return unless envelope_download.url? + + log("Downloading the existing dump from #{envelope_download.url}") + + File.open(filename, 'wb') do |file| + URI.parse(envelope_download.url).open do |data| + file.write(data.read) + end + end + + log("Unarchiving the downloaded dump into #{dirname}") + system("unzip -qq #{filename} -d #{dirname}", exception: true) + rescue StandardError => e + Airbrake.notify(e) + end + + def destroy_envelope_events + @deleted_envelope_ctids = envelope_community + .versions + .where(event: 'destroy') + .where('created_at >= ?', updated_at) + end + + def filename + @filename ||= "#{dirname}.zip" + end + + def log(message) + MR.logger.info(message) + end + + def published_envelopes + @published_envelopes = begin + envelopes = envelope_community + .envelopes + .not_deleted + .includes(:envelope_community, :organization, :publishing_organization) + + envelopes.where!('updated_at >= ?', updated_at) if updated_at + envelopes + end + end + + def region + ENV.fetch('AWS_REGION') + end + + def remove_entries + log('Removing recently deleted envelopes from the dump') + + destroy_envelope_events.select(:id, :envelope_ceterms_ctid).find_each do |event| + FileUtils.remove_file( + File.join(dirname, "#{event.envelope_ceterms_ctid}.json"), + true + ) + end + end + + def run # rubocop:todo Metrics/AbcSize, Metrics/MethodLength + envelope_download.update!( + internal_error_backtrace: [], + internal_error_message: nil, + started_at: Time.current, + status: :in_progress + ) + + envelope_download.with_lock do + if up_to_date? + log('The dump is up to date.') + return + end + + download_file + create_or_update_entries + remove_entries + envelope_download.url = upload_file + rescue StandardError => e + Airbrake.notify(e) + envelope_download&.internal_error_backtrace = e.backtrace + envelope_download&.internal_error_message = e.message + ensure + log('Deleting intermediate files.') + FileUtils.rm_rf(dirname) + FileUtils.rm_f(filename) + envelope_download.update!(finished_at: Time.current, status: :finished) + log('Finished.') + end + end + + def up_to_date? + published_envelopes.none? && destroy_envelope_events.none? + end + + def upload_file + log('Archiving the updated dump.') + + system( + "find #{dirname} -type f -print | zip -FSjqq #{filename} -@", + exception: true + ) + + log('Uploading the updated dump to S3.') + + object = Aws::S3::Resource.new(region:).bucket(bucket).object(filename) + object.upload_file(filename) + object.public_url + end +end diff --git a/config/database.yml b/config/database.yml index 1b5db3fa..f6fc102d 100644 --- a/config/database.yml +++ b/config/database.yml @@ -5,7 +5,7 @@ default: &default database: <%= ENV['POSTGRESQL_DATABASE'] %> password: <%= ENV['POSTGRESQL_PASSWORD'] %> port: <%= ENV.fetch('POSTGRESQL_PORT', 5432) %> - pool: <%= ENV.fetch('SIDEKIQ_CONCURRENCY', 10) %> + pool: <%= ENV.fetch('SIDEKIQ_CONCURRENCY', 10).to_i + 1 %> encoding: utf8 development: diff --git a/db/migrate/20250830180848_add_unique_index_on_envelope_community_id_to_envelope_downloads.rb b/db/migrate/20250830180848_add_unique_index_on_envelope_community_id_to_envelope_downloads.rb new file mode 100644 index 00000000..a6b2fffd --- /dev/null +++ b/db/migrate/20250830180848_add_unique_index_on_envelope_community_id_to_envelope_downloads.rb @@ -0,0 +1,24 @@ +class AddUniqueIndexOnEnvelopeCommunityIdToEnvelopeDownloads < ActiveRecord::Migration[8.0] + def change + ActiveRecord::Base.transaction do + reversible do |dir| + dir.up do + ActiveRecord::Base.connection.execute(<<~COMMAND) + DELETE FROM envelope_downloads + WHERE created_at NOT IN ( + SELECT max_created_at + FROM ( + SELECT MAX(created_at ) as max_created_at + FROM envelope_downloads + GROUP BY envelope_community_id + ) AS t + ); + COMMAND + end + end + + remove_index :envelope_downloads, :envelope_community_id + add_index :envelope_downloads, :envelope_community_id, unique: true + end + end +end diff --git a/db/migrate/20250922224518_add_status_to_envelope_downloads.rb b/db/migrate/20250922224518_add_status_to_envelope_downloads.rb new file mode 100644 index 00000000..86cd4ecb --- /dev/null +++ b/db/migrate/20250922224518_add_status_to_envelope_downloads.rb @@ -0,0 +1,5 @@ +class AddStatusToEnvelopeDownloads < ActiveRecord::Migration[8.0] + def change + add_column :envelope_downloads, :status, :string, default: 'pending', null: false + end +end diff --git a/db/migrate/20250925025616_add_enqueued_at_to_envelope_downloads.rb b/db/migrate/20250925025616_add_enqueued_at_to_envelope_downloads.rb new file mode 100644 index 00000000..1c20b5de --- /dev/null +++ b/db/migrate/20250925025616_add_enqueued_at_to_envelope_downloads.rb @@ -0,0 +1,5 @@ +class AddEnqueuedAtToEnvelopeDownloads < ActiveRecord::Migration[8.0] + def change + add_column :envelope_downloads, :enqueued_at, :datetime + end +end diff --git a/db/structure.sql b/db/structure.sql index ae1c3bff..13067ab7 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -324,7 +324,9 @@ CREATE TABLE public.envelope_downloads ( started_at timestamp(6) without time zone, url character varying, created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL + updated_at timestamp(6) without time zone NOT NULL, + status character varying DEFAULT 'pending'::character varying NOT NULL, + enqueued_at timestamp(6) without time zone ); @@ -1335,7 +1337,7 @@ CREATE INDEX index_envelope_community_configs_on_envelope_community_id ON public -- Name: index_envelope_downloads_on_envelope_community_id; Type: INDEX; Schema: public; Owner: - -- -CREATE INDEX index_envelope_downloads_on_envelope_community_id ON public.envelope_downloads USING btree (envelope_community_id); +CREATE UNIQUE INDEX index_envelope_downloads_on_envelope_community_id ON public.envelope_downloads USING btree (envelope_community_id); -- @@ -1887,8 +1889,11 @@ ALTER TABLE ONLY public.envelopes SET search_path TO "$user", public; INSERT INTO "schema_migrations" (version) VALUES +('20250925025616'), +('20250922224518'), ('20250921174021'), ('20250902034147'), +('20250830180848'), ('20250829235024'), ('20250818021420'), ('20250815032532'), diff --git a/docs/97_create_registry_containers.md b/docs/97_create_registry_containers.md index 2080b722..1bbb40f5 100644 --- a/docs/97_create_registry_containers.md +++ b/docs/97_create_registry_containers.md @@ -40,11 +40,13 @@ If not already built the Registry application image must be build: -t credentialregistry-app:latest \ --build-arg RUBY_VERSION=$(cat .ruby-version) ``` -**IMPORTANT NOTE:** The environment variable `SECRET_KEY_BASE` is used to sign cookies and must be provided at runtime (not at build time). Do NOT bake it into the image. Provide a consistent value via your orchestrator so all instances share the same secret. + + **IMPORTANT NOTE:** The environment variable `SECRET_KEY_BASE` is used to sign cookies and must be provided at runtime (not at build time). Do NOT bake it into the image. Provide a consistent value via your orchestrator so all instances share the same secret. Examples: - docker run + ```bash docker run -e SECRET_KEY_BASE=$(openssl rand -hex 64) -p 9292:9292 credentialregistry-app:latest ``` diff --git a/lib/swagger_docs/models.rb b/lib/swagger_docs/models.rb index c5fe6bf3..ab610210 100644 --- a/lib/swagger_docs/models.rb +++ b/lib/swagger_docs/models.rb @@ -374,10 +374,6 @@ module Models # rubocop:todo Metrics/ModuleLength, Style/Documentation end swagger_schema :EnvelopeDownload do - property :id, - type: :string, - description: 'ID' - property :status, type: :string, description: 'Status (pending, in progress, finished, or failed)' diff --git a/lib/swagger_docs/sections/envelopes.rb b/lib/swagger_docs/sections/envelopes.rb index 72249c48..32ca3e5e 100644 --- a/lib/swagger_docs/sections/envelopes.rb +++ b/lib/swagger_docs/sections/envelopes.rb @@ -59,37 +59,30 @@ module Envelopes # rubocop:todo Metrics/ModuleLength, Style/Documentation end end - swagger_path '/{community_name}/envelopes/downloads' do - operation :post do - key :operationId, 'postApiEnvelopesDownloads' - key :description, 'Starts new download' + swagger_path '/{community_name}/envelopes/download' do + operation :get do + key :operationId, 'getApiEnvelopesDownload' + key :description, "Returns the download's status and URL" key :produces, ['application/json'] key :tags, ['Envelopes'] parameter community_name - response 201 do + response 200 do key :description, 'Download object' schema { key :$ref, :EnvelopeDownload } end end - end - swagger_path '/{community_name}/envelopes/downloads/{id}' do - operation :get do - key :operationId, 'getApiEnvelopesDownloads' - key :description, "Returns download's status and URL" + operation :post do + key :operationId, 'postApiEnvelopesDownloads' + key :description, 'Starts a new download' key :produces, ['application/json'] key :tags, ['Envelopes'] parameter community_name - parameter name: :id, - in: :path, - type: :string, - required: true, - description: 'Download ID' - response 200 do + response 201 do key :description, 'Download object' schema { key :$ref, :EnvelopeDownload } end diff --git a/spec/api/v1/ce_registry_spec.rb b/spec/api/v1/ce_registry_spec.rb index 9bfb5656..8352d12b 100644 --- a/spec/api/v1/ce_registry_spec.rb +++ b/spec/api/v1/ce_registry_spec.rb @@ -4,7 +4,12 @@ before { get '/ce-registry/ctid' } it { expect_status(:ok) } - it { expect(json_resp['ctid']).to match(/urn:ctid:.*/) } + + it { + # rubocop:todo Layout/LineLength + expect(json_resp['ctid']).to match(/^ce-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/) + # rubocop:enable Layout/LineLength + } end context 'Other communities' do # rubocop:todo RSpec/ContextWording diff --git a/spec/api/v1/envelopes_spec.rb b/spec/api/v1/envelopes_spec.rb index 4f3752ef..b77d5ab5 100644 --- a/spec/api/v1/envelopes_spec.rb +++ b/spec/api/v1/envelopes_spec.rb @@ -145,28 +145,18 @@ end # rubocop:todo RSpec/MultipleMemoizedHelpers - context 'GET /:community/envelopes/downloads/:id' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers + context 'GET /:community/envelopes/download' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers let(:finished_at) { nil } let(:internal_error_message) { nil } let(:started_at) { nil } - - let(:envelope_download) do - create( - :envelope_download, - envelope_community:, - finished_at:, - internal_error_message:, - started_at: - ) - end + let(:url) { nil } let(:perform_request) do - get "/envelopes/downloads/#{envelope_download.id}", - 'Authorization' => "Token #{auth_token}" + get '/envelopes/download', 'Authorization' => "Token #{auth_token}" end # rubocop:todo RSpec/MultipleMemoizedHelpers - context 'invalid token' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers + context 'with invalid token' do # rubocop:todo RSpec/MultipleMemoizedHelpers let(:auth_token) { 'invalid token' } before do @@ -179,72 +169,117 @@ end # rubocop:enable RSpec/MultipleMemoizedHelpers - # rubocop:todo RSpec/MultipleMemoizedHelpers - context 'all good' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers - before do - perform_request - expect_status(:ok) - end - - # rubocop:todo RSpec/MultipleMemoizedHelpers + context 'with valid token' do # rubocop:todo RSpec/MultipleMemoizedHelpers # rubocop:todo RSpec/NestedGroups - context 'in progress' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + context 'without envelope download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups # rubocop:enable RSpec/NestedGroups - let(:started_at) { Time.current } + it 'creates new pending download' do + expect { perform_request }.to change(EnvelopeDownload, :count).by(1) + expect_status(:ok) - it 'returns `in progress`' do - expect_json('status', 'in progress') + envelope_download = EnvelopeDownload.last + expect(envelope_download.envelope_community).to eq(envelope_community) + expect(envelope_download.status).to eq('pending') + + expect_json_sizes(2) + expect_json('enqueued_at', nil) + expect_json('status', 'pending') end end - # rubocop:enable RSpec/MultipleMemoizedHelpers - # rubocop:todo RSpec/MultipleMemoizedHelpers # rubocop:todo RSpec/NestedGroups - context 'failed' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + context 'with envelope download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups # rubocop:enable RSpec/NestedGroups - let(:finished_at) { Time.current } - let(:internal_error_message) { Faker::Lorem.sentence } + let!(:envelope_download) do + create( + :envelope_download, + envelope_community:, + finished_at:, + internal_error_message:, + started_at:, + status:, + url: + ) + end - it 'returns `failed`' do - expect_json('status', 'failed') + # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/NestedGroups + context 'in progress' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:status) { :in_progress } + + it 'returns `in progress`' do + expect { perform_request }.not_to change(EnvelopeDownload, :count) + expect_status(:ok) + expect_json_sizes(2) + expect_json('started_at', envelope_download.started_at.as_json) + expect_json('status', 'in_progress') + end end - end - # rubocop:enable RSpec/MultipleMemoizedHelpers + # rubocop:enable RSpec/MultipleMemoizedHelpers - # rubocop:todo RSpec/MultipleMemoizedHelpers - # rubocop:todo RSpec/NestedGroups - context 'finished' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups - # rubocop:enable RSpec/NestedGroups - let(:finished_at) { Time.current } + # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/NestedGroups + context 'failed' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:internal_error_message) { Faker::Lorem.sentence } + let(:status) { :finished } + let(:url) { Faker::Internet.url } + + it 'returns `failed`' do + expect { perform_request }.not_to change(EnvelopeDownload, :count) + expect_status(:ok) + expect_json_sizes(3) + expect_json('finished_at', envelope_download.finished_at.as_json) + expect_json('status', 'failed') + expect_json('url', url) + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers - it 'returns `finished` and URL' do - expect_json('status', 'finished') + # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/NestedGroups + context 'finished' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:finished_at) { Time.current } + let(:status) { :finished } + let(:url) { Faker::Internet.url } + + it 'returns `finished` and URL' do + expect { perform_request }.not_to change(EnvelopeDownload, :count) + expect_status(:ok) + expect_json_sizes(3) + expect_json('finished_at', envelope_download.finished_at.as_json) + expect_json('status', 'finished') + expect_json('url', url) + end end - end - # rubocop:enable RSpec/MultipleMemoizedHelpers + # rubocop:enable RSpec/MultipleMemoizedHelpers - # rubocop:todo RSpec/MultipleMemoizedHelpers - # rubocop:todo RSpec/NestedGroups - context 'pending' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups - # rubocop:enable RSpec/NestedGroups - it 'returns `pending`' do - expect_json('status', 'pending') + # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/NestedGroups + context 'pending' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + let(:status) { :pending } + + # rubocop:enable RSpec/NestedGroups + it 'returns `pending`' do + expect { perform_request }.not_to change(EnvelopeDownload, :count) + expect_status(:ok) + expect_json('status', 'pending') + end end + # rubocop:enable RSpec/MultipleMemoizedHelpers end - # rubocop:enable RSpec/MultipleMemoizedHelpers end - # rubocop:enable RSpec/MultipleMemoizedHelpers end # rubocop:enable RSpec/MultipleMemoizedHelpers - context 'POST /:community/envelopes/downloads' do # rubocop:todo RSpec/ContextWording + context 'POST /:community/envelopes/download' do # rubocop:todo RSpec/ContextWording let(:perform_request) do - post '/envelopes/downloads', - nil, - 'Authorization' => "Token #{auth_token}" + post '/envelopes/download', nil, 'Authorization' => "Token #{auth_token}" end - context 'invalid token' do # rubocop:todo RSpec/ContextWording + context 'with invalid token' do let(:auth_token) { 'invalid token' } before do @@ -256,26 +291,56 @@ end end - context 'all good' do # rubocop:todo RSpec/ContextWording - # rubocop:todo RSpec/MultipleExpectations - it 'starts download' do # rubocop:todo RSpec/ExampleLength, RSpec/MultipleExpectations - # rubocop:enable RSpec/MultipleExpectations - expect do - perform_request - end.to change(EnvelopeDownload, :count).by(1) + context 'with valid token' do + let(:now) { Time.current.change(usec: 0) } - envelope_download = EnvelopeDownload.last - expect(envelope_download.envelope_community.name).to eq('ce_registry') + context 'without envelope download' do # rubocop:todo RSpec/NestedGroups + # rubocop:todo RSpec/MultipleExpectations + it 'creates new pending download and enqueues job' do # rubocop:todo RSpec/ExampleLength + # rubocop:enable RSpec/MultipleExpectations + travel_to now do + expect { perform_request }.to change(EnvelopeDownload, :count).by(1) + end - expect_status(:created) - expect_json('id', envelope_download.id) + expect_status(:created) - expect(ActiveJob::Base.queue_adapter.enqueued_jobs.size).to eq(1) + envelope_download = EnvelopeDownload.last + expect(envelope_download.envelope_community).to eq(envelope_community) + expect(envelope_download.status).to eq('pending') - enqueued_job = ActiveJob::Base.queue_adapter.enqueued_jobs.first - expect(enqueued_job[:args]).to eq([envelope_download.id]) - expect(enqueued_job[:job]).to eq(DownloadEnvelopesJob) + expect_json_sizes(2) + expect_json('enqueued_at', now.as_json) + expect_json('status', 'pending') + + expect(ActiveJob::Base.queue_adapter.enqueued_jobs.size).to eq(1) + + job = ActiveJob::Base.queue_adapter.enqueued_jobs.first + expect(job.fetch('arguments')).to eq([envelope_download.id]) + expect(job.fetch('job_class')).to eq('DownloadEnvelopesJob') + end end + + # rubocop:todo RSpec/MultipleMemoizedHelpers + context 'with envelope download' do # rubocop:todo RSpec/NestedGroups + let!(:envelope_download) do + create(:envelope_download, :finished, envelope_community:) + end + + it 'enqueues job for existing download' do + travel_to now do + expect { perform_request }.to not_change(EnvelopeDownload, :count) + .and enqueue_job(DownloadEnvelopesJob).with(envelope_download.id) + end + + expect_status(:created) + expect(envelope_download.reload.status).to eq('pending') + + expect_json_sizes(2) + expect_json('enqueued_at', now.as_json) + expect_json('status', 'pending') + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers end end end diff --git a/spec/factories/envelope_downloads.rb b/spec/factories/envelope_downloads.rb index dc788927..caf0a418 100644 --- a/spec/factories/envelope_downloads.rb +++ b/spec/factories/envelope_downloads.rb @@ -1,7 +1,26 @@ FactoryBot.define do factory :envelope_download do + enqueued_at { Time.current.change(usec: 0) } # rubocop:todo FactoryBot/FactoryAssociationWithStrategy envelope_community { create(:envelope_community, :with_random_name) } # rubocop:enable FactoryBot/FactoryAssociationWithStrategy + + trait :failed do + finished_at { Time.current.change(usec: 0) } + internal_error_message { Faker::Lorem.sentence } + started_at { Time.current.change(usec: 0) } + status { :finished } + end + + trait :finished do + finished_at { Time.current.change(usec: 0) } + started_at { Time.current.change(usec: 0) } + status { :finished } + end + + trait :in_progress do + started_at { Time.current.change(usec: 0) } + status { :in_progress } + end end end diff --git a/spec/jobs/download_envelopes_job_spec.rb b/spec/jobs/download_envelopes_job_spec.rb index d2a20f8f..d5c2250c 100644 --- a/spec/jobs/download_envelopes_job_spec.rb +++ b/spec/jobs/download_envelopes_job_spec.rb @@ -1,198 +1,29 @@ require 'spec_helper' -RSpec.describe DownloadEnvelopesJob do # rubocop:todo RSpec/MultipleMemoizedHelpers - let(:bucket) { double('bucket') } # rubocop:todo RSpec/VerifiedDoubles - let(:bucket_name) { 'envelope-downloads-bucket-test' } - let(:envelope_download) { create(:envelope_download, envelope_community:) } - let(:hex) { Faker::Lorem.characters } - let(:key) { "ce_registry_#{now.to_i}_#{hex}.zip" } - let(:now) { Time.current.change(usec: 0) } - let(:object) { double('object') } # rubocop:todo RSpec/VerifiedDoubles - let(:region) { 'aws-region-test' } - let(:resource) { double('resource') } # rubocop:todo RSpec/VerifiedDoubles - let(:url) { Faker::Internet.url } - - let(:envelope_community) do - EnvelopeCommunity.find_or_create_by!(name: 'ce_registry') - end - - let(:perform) do - travel_to now do - described_class.new.perform(envelope_download.id) - end - end - - # rubocop:todo RSpec/MultipleMemoizedHelpers - context 'no download' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers - it 'does nothing' do - expect(described_class.new.perform(Faker::Lorem.word)).to be_nil - end - end - # rubocop:enable RSpec/MultipleMemoizedHelpers - - context 'with download' do # rubocop:todo RSpec/MultipleMemoizedHelpers - let!(:envelope1) do # rubocop:todo RSpec/IndexedLet - create(:envelope, :from_cer) - end - - let!(:envelope2) do # rubocop:todo RSpec/IndexedLet - create(:envelope, :from_cer, :with_cer_credential) - end - - before do - allow(ENV).to receive(:fetch).with('AWS_REGION').and_return(region) - - allow(ENV).to receive(:fetch) - .with('ENVELOPE_DOWNLOADS_BUCKET') - .and_return(bucket_name) - - allow(Aws::S3::Resource).to receive(:new) - .with(region:) - .and_return(resource) - - allow(SecureRandom).to receive(:hex).and_return(hex) - - allow(resource).to receive(:bucket).with(bucket_name).and_return(bucket) - allow(bucket).to receive(:object).with(key).and_return(object) - end - - # rubocop:todo RSpec/MultipleMemoizedHelpers - context 'no error' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers - before do - # rubocop:todo RSpec/MessageSpies - expect(object).to receive(:upload_file) do |path| # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies - # rubocop:enable RSpec/MessageSpies - entries = {} - - Zip::InputStream.open(path) do |stream| - loop do - entry = stream.get_next_entry - break unless entry - - entries[entry.name] = JSON(stream.read) - end - end - - entry1 = entries.fetch("#{envelope1.envelope_ceterms_ctid}.json") - entry2 = entries.fetch("#{envelope2.envelope_ceterms_ctid}.json") - - expect(entry1.fetch('envelope_ceterms_ctid')).to eq( # rubocop:todo RSpec/ExpectInHook - envelope1.envelope_ceterms_ctid - ) - expect(entry1.fetch('decoded_resource')).to eq( # rubocop:todo RSpec/ExpectInHook - envelope1.processed_resource - ) - # rubocop:todo RSpec/ExpectInHook - expect(entry1.fetch('updated_at').to_time).to be_within(1.second).of( - # rubocop:enable RSpec/ExpectInHook - envelope1.updated_at - ) - - expect(entry2.fetch('envelope_ceterms_ctid')).to eq( # rubocop:todo RSpec/ExpectInHook - envelope2.envelope_ceterms_ctid - ) - expect(entry2.fetch('decoded_resource')).to eq( # rubocop:todo RSpec/ExpectInHook - envelope2.processed_resource - ) - # rubocop:todo RSpec/ExpectInHook - expect(entry2.fetch('updated_at').to_time).to be_within(1.second).of( - # rubocop:enable RSpec/ExpectInHook - envelope2.updated_at - ) - end - - # rubocop:todo RSpec/StubbedMock - # rubocop:todo RSpec/MessageSpies - expect(object).to receive(:public_url).and_return(url) # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies, RSpec/StubbedMock - # rubocop:enable RSpec/MessageSpies - # rubocop:enable RSpec/StubbedMock - end - - it 'creates and uploads ZIP archive to S3' do - expect do - perform - envelope_download.reload - end.to change(envelope_download, :finished_at).to(now) - .and change(envelope_download, :url).to(url) - # rubocop:todo Layout/LineLength - .and not_change { - # rubocop:enable Layout/LineLength - # rubocop:todo Layout/LineLength - envelope_download.internal_error_message - # rubocop:enable Layout/LineLength - } +RSpec.describe DownloadEnvelopesJob do + let(:envelope_download) { create(:envelope_download) } + + describe '#perform' do + context 'without error' do + it 'calls DownloadEnvelopes' do + allow(DownloadEnvelopes).to receive(:call).with(envelope_download:) + described_class.new.perform(envelope_download.id) end end - # rubocop:enable RSpec/MultipleMemoizedHelpers - context 'with error' do # rubocop:todo RSpec/MultipleMemoizedHelpers - let(:error) { StandardError.new(error_message) } - let(:error_message) { Faker::Lorem.sentence } + context 'with error' do + let(:error) { StandardError.new } - before do - # rubocop:todo RSpec/MessageSpies - expect(Airbrake).to receive(:notify).with(error, # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies - # rubocop:enable RSpec/MessageSpies - envelope_download_id: envelope_download.id) - end - - # rubocop:todo RSpec/NestedGroups - context 'when EnvelopeDownload.find_by fails' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups - # rubocop:enable RSpec/NestedGroups - before do - # rubocop:todo RSpec/StubbedMock - # rubocop:todo RSpec/MessageSpies - # rubocop:todo Layout/LineLength - expect(EnvelopeDownload).to receive(:find_by).with(id: envelope_download.id).and_raise(error) # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies, RSpec/StubbedMock - # rubocop:enable Layout/LineLength - # rubocop:enable RSpec/MessageSpies - # rubocop:enable RSpec/StubbedMock - end - - it 'notifies Airbrake' do # rubocop:todo RSpec/ExampleLength - expect do - perform - envelope_download.reload - end.to not_change(envelope_download, - :finished_at).and not_change(envelope_download, - :internal_error_backtrace) - .and not_change(envelope_download, - :internal_error_message) - .and not_change { - envelope_download.url - } - end - end + it 'logs error' do + allow(Airbrake).to receive(:notify) + .with(error, envelope_download_id: envelope_download.id) - # rubocop:todo RSpec/MultipleMemoizedHelpers - context 'when Aws::S3::Object#upload_file fails' do # rubocop:todo RSpec/NestedGroups - before do - # rubocop:todo RSpec/StubbedMock - # rubocop:todo RSpec/MessageSpies - expect(object).to receive(:upload_file).and_raise(error) # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies, RSpec/StubbedMock - # rubocop:enable RSpec/MessageSpies - # rubocop:enable RSpec/StubbedMock - end + allow(DownloadEnvelopes).to receive(:call) + .with(envelope_download:) + .and_raise(error) - it 'notifies Airbrake and persists error' do - expect do - perform - envelope_download.reload - end.to change(envelope_download, :finished_at).to(now) - .and change(envelope_download, - # rubocop:todo Layout/LineLength - :internal_error_message).to(error_message) - # rubocop:enable Layout/LineLength - # rubocop:todo Layout/LineLength - .and not_change { - # rubocop:enable Layout/LineLength - # rubocop:todo Layout/LineLength - envelope_download.url - # rubocop:enable Layout/LineLength - } - end + described_class.new.perform(envelope_download.id) end - # rubocop:enable RSpec/MultipleMemoizedHelpers end end end diff --git a/spec/models/envelope_spec.rb b/spec/models/envelope_spec.rb index ec903bac..aa88b8bb 100644 --- a/spec/models/envelope_spec.rb +++ b/spec/models/envelope_spec.rb @@ -337,7 +337,9 @@ def resource(ctid) end it 'generates ctids' do - expect(described_class.generate_ctid).to match(/urn:ctid:.*/) + # rubocop:todo Layout/LineLength + expect(described_class.generate_ctid).to match(/^ce-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/) + # rubocop:enable Layout/LineLength end it 'validates uniqueness for ctid' do # rubocop:todo RSpec/MultipleExpectations diff --git a/spec/services/download_envelopes_spec.rb b/spec/services/download_envelopes_spec.rb new file mode 100644 index 00000000..5ec381d6 --- /dev/null +++ b/spec/services/download_envelopes_spec.rb @@ -0,0 +1,229 @@ +RSpec.describe DownloadEnvelopes do # rubocop:todo RSpec/MultipleMemoizedHelpers + let(:bucket) { double('bucket') } # rubocop:todo RSpec/VerifiedDoubles + let(:bucket_name) { 'envelope-downloads-bucket-test' } + let(:envelope_download) { create(:envelope_download, envelope_community:) } + let(:entries) { {} } + let(:hex) { Faker::Lorem.characters.first(32) } + let(:key) { "ce_registry_#{now.to_i}_#{hex}.zip" } + let(:now) { Time.current.change(usec: 0) } + let(:region) { 'aws-region-test' } + let(:resource) { double('resource') } # rubocop:todo RSpec/VerifiedDoubles + let(:s3_object) { double('s3_object') } # rubocop:todo RSpec/VerifiedDoubles + let(:url) { Faker::Internet.url } + + let(:download_envelopes) do + travel_to now do + described_class.call(envelope_download:) + end + end + + let(:envelope_community) do + EnvelopeCommunity.find_or_create_by!(name: 'ce_registry') + end + + let!(:envelope1) do # rubocop:todo RSpec/IndexedLet + create(:envelope, :from_cer) + end + + let!(:envelope2) do # rubocop:todo RSpec/IndexedLet + create(:envelope, :from_cer) + end + + let!(:envelope3) do # rubocop:todo RSpec/IndexedLet + create(:envelope, :from_cer) + end + + before do + allow(ENV).to receive(:fetch).with('AWS_REGION').and_return(region) + + allow(ENV).to receive(:fetch) + .with('ENVELOPE_DOWNLOADS_BUCKET') + .and_return(bucket_name) + + allow(Aws::S3::Resource).to receive(:new) + .with(region:) + .and_return(resource) + + allow(SecureRandom).to receive(:hex).and_return(hex) + + allow(resource).to receive(:bucket).with(bucket_name).and_return(bucket) + allow(bucket).to receive(:object).with(key).and_return(s3_object) + end + + describe '.call' do # rubocop:todo RSpec/MultipleMemoizedHelpers + context 'without error' do # rubocop:todo RSpec/MultipleMemoizedHelpers + before do + allow(s3_object).to receive(:upload_file) do |path| + Zip::InputStream.open(path) do |stream| + loop do + entry = stream.get_next_entry + break unless entry + + entries[entry.name] = JSON(stream.read) + end + end + end + + allow(s3_object).to receive(:public_url).and_return(url) + end + + # rubocop:todo RSpec/NestedGroups + context 'without previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + # rubocop:todo RSpec/MultipleExpectations + it 'creates a new download' do # rubocop:todo RSpec/ExampleLength + # rubocop:enable RSpec/MultipleExpectations + download_envelopes + expect(entries.size).to eq(3) + + entry1 = entries.fetch("#{envelope1.envelope_ceterms_ctid}.json") + entry2 = entries.fetch("#{envelope2.envelope_ceterms_ctid}.json") + entry3 = entries.fetch("#{envelope3.envelope_ceterms_ctid}.json") + + expect(entry1.fetch('envelope_ceterms_ctid')).to eq( + envelope1.envelope_ceterms_ctid + ) + expect(entry1.fetch('decoded_resource')).to eq( + envelope1.processed_resource + ) + expect(entry1.fetch('updated_at').to_time).to be_within(1.second).of( + envelope1.updated_at + ) + + expect(entry2.fetch('envelope_ceterms_ctid')).to eq( + envelope2.envelope_ceterms_ctid + ) + expect(entry2.fetch('decoded_resource')).to eq( + envelope2.processed_resource + ) + expect(entry2.fetch('updated_at').to_time).to be_within(1.second).of( + envelope2.updated_at + ) + + expect(entry3.fetch('envelope_ceterms_ctid')).to eq( + envelope3.envelope_ceterms_ctid + ) + expect(entry3.fetch('decoded_resource')).to eq( + envelope3.processed_resource + ) + expect(entry3.fetch('updated_at').to_time).to be_within(1.second).of( + envelope3.updated_at + ) + + expect(envelope_download.internal_error_message).to be_nil + expect(envelope_download.status).to eq('finished') + expect(envelope_download.url).to eq(url) + end + end + + # rubocop:todo RSpec/NestedGroups + context 'with previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:dump) do + buffer = StringIO.new + + Zip::OutputStream.write_buffer(buffer) do |stream| + [envelope1, envelope2, envelope3].each do |envelope| + stream.put_next_entry("#{envelope.envelope_ceterms_ctid}.json") + stream.puts('{}') + end + end + + buffer.string + end + + let(:envelope_download) do + create( + :envelope_download, + envelope_community:, + started_at: now + 1.second, + url: Faker::Internet.url + ) + end + + let!(:envelope4) do + create(:envelope, :from_cer, updated_at: envelope_download.started_at) + end + + before do + PaperTrail.enabled = true + + envelope2.update_column(:updated_at, envelope_download.started_at) + + travel_to envelope_download.started_at do + envelope3.destroy + end + + stub_request(:get, envelope_download.url).to_return(body: dump) + end + + after do + PaperTrail.enabled = false + end + + # rubocop:todo RSpec/MultipleExpectations + it 'updates the existing download' do # rubocop:todo RSpec/ExampleLength, RSpec/MultipleExpectations + # rubocop:enable RSpec/MultipleExpectations + download_envelopes + expect(entries.size).to eq(3) + + entry1 = entries.fetch("#{envelope1.envelope_ceterms_ctid}.json") + entry2 = entries.fetch("#{envelope2.envelope_ceterms_ctid}.json") + entry3 = entries.fetch("#{envelope4.envelope_ceterms_ctid}.json") + + expect(entry1).to eq({}) + + expect(entry2.fetch('envelope_ceterms_ctid')).to eq( + envelope2.envelope_ceterms_ctid + ) + expect(entry2.fetch('decoded_resource')).to eq( + envelope2.processed_resource + ) + expect(entry2.fetch('updated_at').to_time).to be_within(1.second).of( + envelope2.updated_at + ) + + expect(entry3.fetch('envelope_ceterms_ctid')).to eq( + envelope4.envelope_ceterms_ctid + ) + expect(entry3.fetch('decoded_resource')).to eq( + envelope4.processed_resource + ) + expect(entry3.fetch('updated_at').to_time).to be_within(1.second).of( + envelope4.updated_at + ) + + expect(envelope_download.internal_error_message).to be_nil + expect(envelope_download.status).to eq('finished') + expect(envelope_download.url).to eq(url) + end + end + end + + context 'with error' do # rubocop:todo RSpec/MultipleMemoizedHelpers + let(:error) { StandardError.new(error_message) } + let(:error_message) { Faker::Lorem.sentence } + + it 'notifies Airbrake and persists error' do # rubocop:todo RSpec/ExampleLength + allow(Airbrake).to receive(:notify).with(error) + allow(s3_object).to receive(:upload_file).and_raise(error) + + expect do + download_envelopes + envelope_download.reload + end.to change(envelope_download, :finished_at).to(now) + .and change(envelope_download, + # rubocop:todo Layout/LineLength + :internal_error_message).to(error_message) + # rubocop:enable Layout/LineLength + # rubocop:todo Layout/LineLength + .and not_change { + # rubocop:enable Layout/LineLength + # rubocop:todo Layout/LineLength + envelope_download.url + # rubocop:enable Layout/LineLength + } + end + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 677ff33c..fcd2c364 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -107,3 +107,4 @@ end RSpec::Matchers.define_negated_matcher :not_change, :change +RSpec::Matchers.define_negated_matcher :not_enqueue_job, :enqueue_job