diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index abd1453fa..a763d1ac2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,8 +94,10 @@ jobs: env: DATABASE_URL: postgres://postgres:postgres@localhost:5432/nzsl_test DEVISE_SECRET_KEY: anything + AWS_REGION: ap-southeast-2 RAILS_ENV: test run: | + cp env-example .env bundle exec rails db:prepare - name: Run rspec @@ -103,7 +105,8 @@ jobs: DATABASE_URL: postgres://postgres:postgres@localhost:5432/nzsl_test DEVISE_SECRET_KEY: anything NZSL_ONLINE_SECRET_KEY_BASE: anything - APP_DOMAIN_NAME: localhost:3000 - APP_PROTOCOL: http + AWS_REGION: ap-southeast-2 S3_BUCKET_URL: http://s3-ap-southeast-2.amazonaws.com/dummy-fake/ - run: bundle exec rspec spec + run: | + cp env-example .env + bundle exec rspec spec diff --git a/Gemfile b/Gemfile index 9237e5c8c..d30da7236 100644 --- a/Gemfile +++ b/Gemfile @@ -11,6 +11,7 @@ gem 'pg', '~>1.2' # Use SQLite to access signs from a Signbank dictionary export gem 'sqlite3' +gem 'aws-sdk-s3' gem 'bootsnap', '>= 1.1.0', require: false gem 'haml' gem 'jquery-rails' diff --git a/Gemfile.lock b/Gemfile.lock index ebf20cd6c..7d8530b37 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -71,6 +71,22 @@ GEM ast (2.4.2) autoprefixer-rails (10.3.3.0) execjs (~> 2) + aws-eventstream (1.3.0) + aws-partitions (1.878.0) + aws-sdk-core (3.190.2) + aws-eventstream (~> 1, >= 1.3.0) + aws-partitions (~> 1, >= 1.651.0) + aws-sigv4 (~> 1.8) + jmespath (~> 1, >= 1.6.1) + aws-sdk-kms (1.76.0) + aws-sdk-core (~> 3, >= 3.188.0) + aws-sigv4 (~> 1.1) + aws-sdk-s3 (1.142.0) + aws-sdk-core (~> 3, >= 3.189.0) + aws-sdk-kms (~> 1) + aws-sigv4 (~> 1.8) + aws-sigv4 (1.8.0) + aws-eventstream (~> 1, >= 1.0.2) babel-source (5.8.35) babel-transpiler (0.7.0) babel-source (>= 4.0, < 6) @@ -175,6 +191,7 @@ GEM multi_xml (>= 0.5.2) i18n (1.14.1) concurrent-ruby (~> 1.0) + jmespath (1.6.2) jquery-rails (4.4.0) rails-dom-testing (>= 1, < 3) railties (>= 4.2.0) @@ -420,6 +437,7 @@ PLATFORMS DEPENDENCIES autoprefixer-rails + aws-sdk-s3 bootsnap (>= 1.1.0) brakeman bundle-audit diff --git a/config/initializers/sign_database.rb b/config/initializers/sign_database.rb index d361cde21..ff71e168c 100644 --- a/config/initializers/sign_database.rb +++ b/config/initializers/sign_database.rb @@ -1,8 +1,5 @@ -# Update the dictionary file if it is older than 1 month -# We update this file in both dictionary modes because our tests -# expect the database to test across both modes -path = Rails.root.join('db', 'dictionary.sqlite3') -Rails.application.load_tasks -deployed = !Rails.env.development? && !Rails.env.test? - -Rake::Task['dictionary:update'].execute if deployed || (!path.exist? || path.mtime <= 1.month.ago) +Rails.application.reloader.to_prepare do + # Update the dictionary file on boot + Rails.application.load_tasks + Rake::Task['dictionary:update'].execute +end diff --git a/env-example b/env-example index 6b1b48653..ee1bbca83 100644 --- a/env-example +++ b/env-example @@ -1,4 +1,7 @@ S3_BUCKET_URL: "example.s3.url/" NZSL_ONLINE_SECRET_KEY_BASE: 62da7bed624d0cbbe3d186166fdd88db5bb3989075a2154cebe3e5ee20a4f2a2d540865309958346b7b43799d461be2b37c6e27d1fd6ca03b1f59622c5ccc402 APP_DOMAIN_NAME: "localhost:3000" -APP_PROTOCOL: "http" \ No newline at end of file +APP_PROTOCOL: "http" + +# The latest public release +DICTIONARY_DATABASE_S3_LOCATION="s3://nzsl-signbank-media-production/dictionary-exports/nzsl.db" diff --git a/lib/tasks/dictionary.rake b/lib/tasks/dictionary.rake index e54a99bd6..0e6e15956 100644 --- a/lib/tasks/dictionary.rake +++ b/lib/tasks/dictionary.rake @@ -1,29 +1,47 @@ -namespace :dictionary do +namespace :dictionary do # rubocop:disable Metrics/BlockLength desc 'Updates the NZSL dictionary packaged with the application to the latest release from Signbank' task :update do # rubocop:disable Rails/RakeEnvironment - we need to place this file before the app can start - repo = 'odnzsl/nzsl-dictionary-scripts' - filename = 'nzsl.db' - content_type = 'application/vnd.sqlite3' - release_uri = URI::HTTPS.build(host: 'api.github.com', path: "/repos/#{repo}/releases/latest") - release = JSON.parse(release_uri.open.read) - database_asset = release['assets'].find do |asset| - asset['name'] == filename && asset['content_type'] == content_type - end - - database_url = database_asset.fetch('browser_download_url') + database_s3_location = URI.parse(ENV.fetch('DICTIONARY_DATABASE_S3_LOCATION') || '') + raise 'DICTIONARY_DATABASE_S3_LOCATION must be an S3 URI' unless database_s3_location.scheme == 's3' - File.open('db/new-dictionary.sqlite3', 'wb') do |f| - f.write URI.parse(database_url).open.read - rescue OpenURI::HTTPError - sleep 5 # Wait a few seconds before retrying - retry - end + download_s3_uri(database_s3_location, 'db/new-dictionary.sqlite3') database = SQLite3::Database.open('db/new-dictionary.sqlite3') raise 'Database does not pass integrity check' unless database.integrity_check == [['ok']] + version = database.get_int_pragma('user_version') + FileUtils.mv('db/new-dictionary.sqlite3', 'db/dictionary.sqlite3') - puts "Updated db/dictionary.sqlite3 to #{release['name']}" + puts "Updated db/dictionary.sqlite3 to #{version}" + end + + def s3_client + @s3_client ||= Aws::S3::Client.new({ + region: ENV.fetch('DICTIONARY_AWS_REGION', ENV.fetch('AWS_REGION', nil)), + access_key_id: ENV.fetch('DICTIONARY_AWS_ACCESS_KEY_ID', nil), + secret_access_key: ENV.fetch('DICTIONARY_AWS_SECRET_ACCESS_KEY', nil) + }.compact) + end + + def download_s3_uri(s3_uri, target) + bucket = s3_uri.host + key = s3_uri.path[1..] + + begin + s3_client.get_object({ bucket:, key: }, target:) + rescue Aws::Errors::MissingCredentialsError, + Aws::Sigv4::Errors::MissingCredentialsError, + Aws::S3::Errors::ServiceError + + # Fallback to public-URL download over HTTP if credentials are not provided or invalid. + # TODO use aws-sdk to leverage aws-client optimizations once unsigned requests are supported: + # https://github.com/aws/aws-sdk-ruby/issues/1149 + public_url = URI.parse(Aws::S3::Bucket.new(bucket, credentials: 0).object(key).public_url) + Net::HTTP.start(public_url.host, public_url.port, use_ssl: true) do |http| + response = http.get(public_url.request_uri).tap(&:value) + File.binwrite(target, response.body) + end + end end end