From 6a0dddd9e2757d7fa10b3e41c61d116295d19473 Mon Sep 17 00:00:00 2001 From: Josh McArthur Date: Thu, 11 Jan 2024 10:40:25 +1300 Subject: [PATCH 1/6] Add dependency on aws-sdk-s3 to download the dictionary file from S3 --- Gemfile | 1 + Gemfile.lock | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/Gemfile b/Gemfile index 9237e5c8c..d30da7236 100644 --- a/Gemfile +++ b/Gemfile @@ -11,6 +11,7 @@ gem 'pg', '~>1.2' # Use SQLite to access signs from a Signbank dictionary export gem 'sqlite3' +gem 'aws-sdk-s3' gem 'bootsnap', '>= 1.1.0', require: false gem 'haml' gem 'jquery-rails' diff --git a/Gemfile.lock b/Gemfile.lock index ebf20cd6c..7d8530b37 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -71,6 +71,22 @@ GEM ast (2.4.2) autoprefixer-rails (10.3.3.0) execjs (~> 2) + aws-eventstream (1.3.0) + aws-partitions (1.878.0) + aws-sdk-core (3.190.2) + aws-eventstream (~> 1, >= 1.3.0) + aws-partitions (~> 1, >= 1.651.0) + aws-sigv4 (~> 1.8) + jmespath (~> 1, >= 1.6.1) + aws-sdk-kms (1.76.0) + aws-sdk-core (~> 3, >= 3.188.0) + aws-sigv4 (~> 1.1) + aws-sdk-s3 (1.142.0) + aws-sdk-core (~> 3, >= 3.189.0) + aws-sdk-kms (~> 1) + aws-sigv4 (~> 1.8) + aws-sigv4 (1.8.0) + aws-eventstream (~> 1, >= 1.0.2) babel-source (5.8.35) babel-transpiler (0.7.0) babel-source (>= 4.0, < 6) @@ -175,6 +191,7 @@ GEM multi_xml (>= 0.5.2) i18n (1.14.1) concurrent-ruby (~> 1.0) + jmespath (1.6.2) jquery-rails (4.4.0) rails-dom-testing (>= 1, < 3) railties (>= 4.2.0) @@ -420,6 +437,7 @@ PLATFORMS DEPENDENCIES autoprefixer-rails + aws-sdk-s3 bootsnap (>= 1.1.0) brakeman bundle-audit From d203ee82450b71e86c716b905a24b7956e7469a8 Mon Sep 17 00:00:00 2001 From: Josh McArthur Date: Thu, 11 Jan 2024 10:40:36 +1300 Subject: [PATCH 2/6] Add code to update dictionary from S3 from nzsl-share --- config/initializers/sign_database.rb | 27 ++++++++++---- env-example | 5 ++- lib/tasks/dictionary.rake | 54 ++++++++++++++++++---------- 3 files changed, 60 insertions(+), 26 deletions(-) diff --git a/config/initializers/sign_database.rb b/config/initializers/sign_database.rb index d361cde21..8406bc0f8 100644 --- a/config/initializers/sign_database.rb +++ b/config/initializers/sign_database.rb @@ -1,8 +1,21 @@ -# Update the dictionary file if it is older than 1 month -# We update this file in both dictionary modes because our tests -# expect the database to test across both modes -path = Rails.root.join('db', 'dictionary.sqlite3') -Rails.application.load_tasks -deployed = !Rails.env.development? && !Rails.env.test? +Rails.application.reloader.to_prepare do + # Update the dictionary file on boot + Rails.application.load_tasks + deployed = !Rails.env.development? && !Rails.env.test? -Rake::Task['dictionary:update'].execute if deployed || (!path.exist? || path.mtime <= 1.month.ago) + begin + Rake::Task["dictionary:update"].execute if deployed + rescue StandardError => e + warn e + end + + ## + # All other tables make heavy use of a 'word' column. Add an alias for it here so that + # we can use common queries and ordering. + # There's no ADD COLUMN IF NOT EXISTS, so we just handle the error + begin + DictionarySign.connection.execute("ALTER TABLE words ADD COLUMN word text AS (gloss)") + rescue ActiveRecord::StatementInvalid => e + raise e unless e.message == "SQLite3::SQLException: duplicate column name: word" + end +end diff --git a/env-example b/env-example index 6b1b48653..ee1bbca83 100644 --- a/env-example +++ b/env-example @@ -1,4 +1,7 @@ S3_BUCKET_URL: "example.s3.url/" NZSL_ONLINE_SECRET_KEY_BASE: 62da7bed624d0cbbe3d186166fdd88db5bb3989075a2154cebe3e5ee20a4f2a2d540865309958346b7b43799d461be2b37c6e27d1fd6ca03b1f59622c5ccc402 APP_DOMAIN_NAME: "localhost:3000" -APP_PROTOCOL: "http" \ No newline at end of file +APP_PROTOCOL: "http" + +# The latest public release +DICTIONARY_DATABASE_S3_LOCATION="s3://nzsl-signbank-media-production/dictionary-exports/nzsl.db" diff --git a/lib/tasks/dictionary.rake b/lib/tasks/dictionary.rake index e54a99bd6..0e6e15956 100644 --- a/lib/tasks/dictionary.rake +++ b/lib/tasks/dictionary.rake @@ -1,29 +1,47 @@ -namespace :dictionary do +namespace :dictionary do # rubocop:disable Metrics/BlockLength desc 'Updates the NZSL dictionary packaged with the application to the latest release from Signbank' task :update do # rubocop:disable Rails/RakeEnvironment - we need to place this file before the app can start - repo = 'odnzsl/nzsl-dictionary-scripts' - filename = 'nzsl.db' - content_type = 'application/vnd.sqlite3' - release_uri = URI::HTTPS.build(host: 'api.github.com', path: "/repos/#{repo}/releases/latest") - release = JSON.parse(release_uri.open.read) - database_asset = release['assets'].find do |asset| - asset['name'] == filename && asset['content_type'] == content_type - end - - database_url = database_asset.fetch('browser_download_url') + database_s3_location = URI.parse(ENV.fetch('DICTIONARY_DATABASE_S3_LOCATION') || '') + raise 'DICTIONARY_DATABASE_S3_LOCATION must be an S3 URI' unless database_s3_location.scheme == 's3' - File.open('db/new-dictionary.sqlite3', 'wb') do |f| - f.write URI.parse(database_url).open.read - rescue OpenURI::HTTPError - sleep 5 # Wait a few seconds before retrying - retry - end + download_s3_uri(database_s3_location, 'db/new-dictionary.sqlite3') database = SQLite3::Database.open('db/new-dictionary.sqlite3') raise 'Database does not pass integrity check' unless database.integrity_check == [['ok']] + version = database.get_int_pragma('user_version') + FileUtils.mv('db/new-dictionary.sqlite3', 'db/dictionary.sqlite3') - puts "Updated db/dictionary.sqlite3 to #{release['name']}" + puts "Updated db/dictionary.sqlite3 to #{version}" + end + + def s3_client + @s3_client ||= Aws::S3::Client.new({ + region: ENV.fetch('DICTIONARY_AWS_REGION', ENV.fetch('AWS_REGION', nil)), + access_key_id: ENV.fetch('DICTIONARY_AWS_ACCESS_KEY_ID', nil), + secret_access_key: ENV.fetch('DICTIONARY_AWS_SECRET_ACCESS_KEY', nil) + }.compact) + end + + def download_s3_uri(s3_uri, target) + bucket = s3_uri.host + key = s3_uri.path[1..] + + begin + s3_client.get_object({ bucket:, key: }, target:) + rescue Aws::Errors::MissingCredentialsError, + Aws::Sigv4::Errors::MissingCredentialsError, + Aws::S3::Errors::ServiceError + + # Fallback to public-URL download over HTTP if credentials are not provided or invalid. + # TODO use aws-sdk to leverage aws-client optimizations once unsigned requests are supported: + # https://github.com/aws/aws-sdk-ruby/issues/1149 + public_url = URI.parse(Aws::S3::Bucket.new(bucket, credentials: 0).object(key).public_url) + Net::HTTP.start(public_url.host, public_url.port, use_ssl: true) do |http| + response = http.get(public_url.request_uri).tap(&:value) + File.binwrite(target, response.body) + end + end end end From 8a6a1150ecd0b8fad2991661ee283d3a3bb09d9d Mon Sep 17 00:00:00 2001 From: Josh McArthur Date: Thu, 11 Jan 2024 10:44:04 +1300 Subject: [PATCH 3/6] Copy env-example during CI run to load default config --- .github/workflows/ci.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index abd1453fa..5af01d89b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -96,6 +96,7 @@ jobs: DEVISE_SECRET_KEY: anything RAILS_ENV: test run: | + cp env-example .env bundle exec rails db:prepare - name: Run rspec @@ -103,7 +104,7 @@ jobs: DATABASE_URL: postgres://postgres:postgres@localhost:5432/nzsl_test DEVISE_SECRET_KEY: anything NZSL_ONLINE_SECRET_KEY_BASE: anything - APP_DOMAIN_NAME: localhost:3000 - APP_PROTOCOL: http S3_BUCKET_URL: http://s3-ap-southeast-2.amazonaws.com/dummy-fake/ - run: bundle exec rspec spec + run: | + cp env-example .env + bundle exec rspec spec From 139a9c202bf62c1bbd2727e247f5428e62f32b5e Mon Sep 17 00:00:00 2001 From: Josh McArthur Date: Thu, 11 Jan 2024 11:02:40 +1300 Subject: [PATCH 4/6] Use correct model name for Dictionary --- config/initializers/sign_database.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/initializers/sign_database.rb b/config/initializers/sign_database.rb index 8406bc0f8..86e15efc5 100644 --- a/config/initializers/sign_database.rb +++ b/config/initializers/sign_database.rb @@ -4,7 +4,7 @@ deployed = !Rails.env.development? && !Rails.env.test? begin - Rake::Task["dictionary:update"].execute if deployed + Rake::Task['dictionary:update'].execute if deployed rescue StandardError => e warn e end @@ -14,8 +14,8 @@ # we can use common queries and ordering. # There's no ADD COLUMN IF NOT EXISTS, so we just handle the error begin - DictionarySign.connection.execute("ALTER TABLE words ADD COLUMN word text AS (gloss)") + Signbank::Sign.connection.execute('ALTER TABLE words ADD COLUMN word text AS (gloss)') rescue ActiveRecord::StatementInvalid => e - raise e unless e.message == "SQLite3::SQLException: duplicate column name: word" + raise e unless e.message == 'SQLite3::SQLException: duplicate column name: word' end end From a8d4a45d5897f074540a9d442b3bf529d4e17a18 Mon Sep 17 00:00:00 2001 From: Josh McArthur Date: Thu, 11 Jan 2024 12:13:43 +1300 Subject: [PATCH 5/6] Update the sign database each time Rails starts --- config/initializers/sign_database.rb | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/config/initializers/sign_database.rb b/config/initializers/sign_database.rb index 86e15efc5..ff71e168c 100644 --- a/config/initializers/sign_database.rb +++ b/config/initializers/sign_database.rb @@ -1,21 +1,5 @@ Rails.application.reloader.to_prepare do # Update the dictionary file on boot Rails.application.load_tasks - deployed = !Rails.env.development? && !Rails.env.test? - - begin - Rake::Task['dictionary:update'].execute if deployed - rescue StandardError => e - warn e - end - - ## - # All other tables make heavy use of a 'word' column. Add an alias for it here so that - # we can use common queries and ordering. - # There's no ADD COLUMN IF NOT EXISTS, so we just handle the error - begin - Signbank::Sign.connection.execute('ALTER TABLE words ADD COLUMN word text AS (gloss)') - rescue ActiveRecord::StatementInvalid => e - raise e unless e.message == 'SQLite3::SQLException: duplicate column name: word' - end + Rake::Task['dictionary:update'].execute end From 4e87a2bdd18ee46f9710ae4ed39a124377b20d19 Mon Sep 17 00:00:00 2001 From: Josh McArthur Date: Thu, 11 Jan 2024 12:43:33 +1300 Subject: [PATCH 6/6] Provide AWS_REGION for CI configuration --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5af01d89b..a763d1ac2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,6 +94,7 @@ jobs: env: DATABASE_URL: postgres://postgres:postgres@localhost:5432/nzsl_test DEVISE_SECRET_KEY: anything + AWS_REGION: ap-southeast-2 RAILS_ENV: test run: | cp env-example .env @@ -104,6 +105,7 @@ jobs: DATABASE_URL: postgres://postgres:postgres@localhost:5432/nzsl_test DEVISE_SECRET_KEY: anything NZSL_ONLINE_SECRET_KEY_BASE: anything + AWS_REGION: ap-southeast-2 S3_BUCKET_URL: http://s3-ap-southeast-2.amazonaws.com/dummy-fake/ run: | cp env-example .env