From f7ae9bbde1084e875cb1e7fe23d7d9b089b349e1 Mon Sep 17 00:00:00 2001 From: Kshitiz Shakya Date: Wed, 14 May 2025 17:14:46 +0545 Subject: [PATCH] Using larger github runner + Move models.json to s3 bucket [skip-test] --- .github/workflows/create_search_index.yml | 7 ++++++- docs/Gemfile | 3 +++ docs/Gemfile.lock | 22 ++++++++++++++++++++++ docs/_plugins/search_index.rb | 23 +++++++++++++++++++++-- docs/latest.html | 3 --- 5 files changed, 52 insertions(+), 6 deletions(-) delete mode 100644 docs/latest.html diff --git a/.github/workflows/create_search_index.yml b/.github/workflows/create_search_index.yml index fece33dc41e8f0..5d4f6db3d2f871 100644 --- a/.github/workflows/create_search_index.yml +++ b/.github/workflows/create_search_index.yml @@ -11,7 +11,8 @@ concurrency: jobs: jekyll: - runs-on: ubuntu-latest + runs-on: RAM32GB + timeout-minutes: 600 environment: jekyll steps: - uses: actions/checkout@v2 @@ -49,6 +50,8 @@ jobs: ELASTICSEARCH_INDEX_NAME: ${{ secrets.ELASTICSEARCH_INDEX_NAME }} SEARCH_ORIGIN: ${{ secrets.SEARCH_ORIGIN }} ORIGIN: ${{ secrets.ORIGIN }} + AWS_ACCESS_KEY_ID: ${{ secrets.MODELS_PUBLIC_KEY }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.MODELS_SECRET_KEY }} working-directory: docs run: | bundle exec jekyll build --incremental @@ -62,6 +65,8 @@ jobs: ELASTICSEARCH_INDEX_NAME: ${{ secrets.ELASTICSEARCH_INDEX_NAME }} SEARCH_ORIGIN: ${{ secrets.SEARCH_ORIGIN }} ORIGIN: ${{ secrets.ORIGIN }} + AWS_ACCESS_KEY_ID: ${{ secrets.MODELS_PUBLIC_KEY }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.MODELS_SECRET_KEY }} working-directory: docs run: | rm -f .jekyll-metadata diff --git a/docs/Gemfile b/docs/Gemfile index 41e93b1c165a29..8a4e98f169603c 100644 --- a/docs/Gemfile +++ b/docs/Gemfile @@ -11,6 +11,9 @@ gem "webrick" gem "jekyll", "~> 3.9" +gem "aws-sdk-s3", "~>1" + + group "jekyll-plugins" do gem "jekyll-incremental", "0.1.0", path: "_plugins/jekyll-incremental" end diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 880e734f5d046b..02bbee56ff83ba 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -15,6 +15,25 @@ GEM zeitwerk (~> 2.2, >= 2.2.2) addressable (2.8.1) public_suffix (>= 2.0.2, < 6.0) + aws-eventstream (1.4.0) + aws-partitions (1.1126.0) + aws-sdk-core (3.226.2) + aws-eventstream (~> 1, >= 1.3.0) + aws-partitions (~> 1, >= 1.992.0) + aws-sigv4 (~> 1.9) + base64 + jmespath (~> 1, >= 1.6.1) + logger + aws-sdk-kms (1.106.0) + aws-sdk-core (~> 3, >= 3.225.0) + aws-sigv4 (~> 1.5) + aws-sdk-s3 (1.192.0) + aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-kms (~> 1) + aws-sigv4 (~> 1.5) + aws-sigv4 (1.12.1) + aws-eventstream (~> 1, >= 1.0.2) + base64 (0.3.0) coffee-script (2.4.1) coffee-script-source execjs @@ -233,6 +252,7 @@ GEM gemoji (~> 3.0) html-pipeline (~> 2.2) jekyll (>= 3.0, < 5.0) + jmespath (1.6.2) kramdown (2.3.2) rexml kramdown-parser-gfm (1.1.0) @@ -241,6 +261,7 @@ GEM listen (3.8.0) rb-fsevent (~> 0.10, >= 0.10.3) rb-inotify (~> 0.9, >= 0.9.10) + logger (1.7.0) mercenary (0.3.6) mini_portile2 (2.8.1) minima (2.5.1) @@ -301,6 +322,7 @@ PLATFORMS x86_64-linux DEPENDENCIES + aws-sdk-s3 (~> 1) elasticsearch (~> 7.10) github-pages (= 227) jekyll (~> 3.9) diff --git a/docs/_plugins/search_index.rb b/docs/_plugins/search_index.rb index 4fc1b38bd71bd8..f8f5139cdabd11 100644 --- a/docs/_plugins/search_index.rb +++ b/docs/_plugins/search_index.rb @@ -5,7 +5,9 @@ require 'date' require 'elasticsearch' require 'nokogiri' +require 'aws-sdk-s3' +BUCKET_NAME="pypi.johnsnowlabs.com" SEARCH_URL = (ENV["SEARCH_ORIGIN"] || 'https://search.modelshub.johnsnowlabs.com') + '/' ELASTICSEARCH_INDEX_NAME = ENV["ELASTICSEARCH_INDEX_NAME"] || 'models' @@ -17,6 +19,18 @@ $remote_editions = Set.new +def upload_file_to_s3_bucket(file_path) + s3 = Aws::S3::Client.new(region: 'eu-west-1') + object_key = "public/models.json" + begin + s3.put_object(bucket: BUCKET_NAME, key: object_key, body: File.open(file_path, 'rb'), acl: 'public-read') + puts "File uploaded successfully to #{BUCKET_NAME}/#{object_key}" + + rescue Aws::S3::Errors::ServiceError => e + puts "Failed to upload file: #{e.message}" + end +end + class Version < Array def initialize name m = /(\d+\.\d+)\z/.match(name) @@ -252,7 +266,7 @@ def initialize(client) def index(id, data) @buffer << { update: { _id: id, data: {doc: data, doc_as_upsert: true}} } - self.execute if @buffer.length >= 100 + self.execute if @buffer.length >= 500 end def execute @@ -578,9 +592,14 @@ def is_latest?(group, model) models_references_json = backup_references_data.merge(models_references_json) end - filename = File.join(site.config['destination'], 'models.json') + filename = File.join(site.config['destination'], 'backup-modelss3.json') + File.write(filename, models_json.values.to_json) File.write(backup_filename, models_json.to_json) + # models.json moved to pypi s3 bucket + upload_file_to_s3_bucket(filename) + + File.delete(filename) benchmarking_filename = File.join(site.config['destination'], 'benchmarking.json') File.write(benchmarking_filename, models_benchmarking_json.to_json) diff --git a/docs/latest.html b/docs/latest.html deleted file mode 100644 index 89dc810eadb76b..00000000000000 --- a/docs/latest.html +++ /dev/null @@ -1,3 +0,0 @@ ---- -layout: archive ----