Skip to content

Commit

Permalink
Merge pull request #1523 from ODNZSL/task/nzsl-159-update-nzsl-dictio…
Browse files Browse the repository at this point in the history
…nary-from-s3

NZSL-159: Update dictionary from S3 rather than a github release
  • Loading branch information
joshmcarthur authored Jan 15, 2024
2 parents 596a279 + 4e87a2b commit acfcdb1
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 30 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,19 @@ jobs:
env:
DATABASE_URL: postgres://postgres:postgres@localhost:5432/nzsl_test
DEVISE_SECRET_KEY: anything
AWS_REGION: ap-southeast-2
RAILS_ENV: test
run: |
cp env-example .env
bundle exec rails db:prepare
- name: Run rspec
env:
DATABASE_URL: postgres://postgres:postgres@localhost:5432/nzsl_test
DEVISE_SECRET_KEY: anything
NZSL_ONLINE_SECRET_KEY_BASE: anything
APP_DOMAIN_NAME: localhost:3000
APP_PROTOCOL: http
AWS_REGION: ap-southeast-2
S3_BUCKET_URL: http://s3-ap-southeast-2.amazonaws.com/dummy-fake/
run: bundle exec rspec spec
run: |
cp env-example .env
bundle exec rspec spec
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ gem 'pg', '~>1.2'
# Use SQLite to access signs from a Signbank dictionary export
gem 'sqlite3'

gem 'aws-sdk-s3'
gem 'bootsnap', '>= 1.1.0', require: false
gem 'haml'
gem 'jquery-rails'
Expand Down
18 changes: 18 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,22 @@ GEM
ast (2.4.2)
autoprefixer-rails (10.3.3.0)
execjs (~> 2)
aws-eventstream (1.3.0)
aws-partitions (1.878.0)
aws-sdk-core (3.190.2)
aws-eventstream (~> 1, >= 1.3.0)
aws-partitions (~> 1, >= 1.651.0)
aws-sigv4 (~> 1.8)
jmespath (~> 1, >= 1.6.1)
aws-sdk-kms (1.76.0)
aws-sdk-core (~> 3, >= 3.188.0)
aws-sigv4 (~> 1.1)
aws-sdk-s3 (1.142.0)
aws-sdk-core (~> 3, >= 3.189.0)
aws-sdk-kms (~> 1)
aws-sigv4 (~> 1.8)
aws-sigv4 (1.8.0)
aws-eventstream (~> 1, >= 1.0.2)
babel-source (5.8.35)
babel-transpiler (0.7.0)
babel-source (>= 4.0, < 6)
Expand Down Expand Up @@ -175,6 +191,7 @@ GEM
multi_xml (>= 0.5.2)
i18n (1.14.1)
concurrent-ruby (~> 1.0)
jmespath (1.6.2)
jquery-rails (4.4.0)
rails-dom-testing (>= 1, < 3)
railties (>= 4.2.0)
Expand Down Expand Up @@ -420,6 +437,7 @@ PLATFORMS

DEPENDENCIES
autoprefixer-rails
aws-sdk-s3
bootsnap (>= 1.1.0)
brakeman
bundle-audit
Expand Down
13 changes: 5 additions & 8 deletions config/initializers/sign_database.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# Update the dictionary file if it is older than 1 month
# We update this file in both dictionary modes because our tests
# expect the database to test across both modes
path = Rails.root.join('db', 'dictionary.sqlite3')
Rails.application.load_tasks
deployed = !Rails.env.development? && !Rails.env.test?

Rake::Task['dictionary:update'].execute if deployed || (!path.exist? || path.mtime <= 1.month.ago)
Rails.application.reloader.to_prepare do
# Update the dictionary file on boot
Rails.application.load_tasks
Rake::Task['dictionary:update'].execute
end
5 changes: 4 additions & 1 deletion env-example
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
S3_BUCKET_URL: "example.s3.url/"
NZSL_ONLINE_SECRET_KEY_BASE: 62da7bed624d0cbbe3d186166fdd88db5bb3989075a2154cebe3e5ee20a4f2a2d540865309958346b7b43799d461be2b37c6e27d1fd6ca03b1f59622c5ccc402
APP_DOMAIN_NAME: "localhost:3000"
APP_PROTOCOL: "http"
APP_PROTOCOL: "http"

# The latest public release
DICTIONARY_DATABASE_S3_LOCATION="s3://nzsl-signbank-media-production/dictionary-exports/nzsl.db"
54 changes: 36 additions & 18 deletions lib/tasks/dictionary.rake
Original file line number Diff line number Diff line change
@@ -1,29 +1,47 @@
namespace :dictionary do
namespace :dictionary do # rubocop:disable Metrics/BlockLength
desc 'Updates the NZSL dictionary packaged with the application to the latest release from Signbank'
task :update do # rubocop:disable Rails/RakeEnvironment - we need to place this file before the app can start
repo = 'odnzsl/nzsl-dictionary-scripts'
filename = 'nzsl.db'
content_type = 'application/vnd.sqlite3'
release_uri = URI::HTTPS.build(host: 'api.github.com', path: "/repos/#{repo}/releases/latest")
release = JSON.parse(release_uri.open.read)
database_asset = release['assets'].find do |asset|
asset['name'] == filename && asset['content_type'] == content_type
end

database_url = database_asset.fetch('browser_download_url')
database_s3_location = URI.parse(ENV.fetch('DICTIONARY_DATABASE_S3_LOCATION') || '')
raise 'DICTIONARY_DATABASE_S3_LOCATION must be an S3 URI' unless database_s3_location.scheme == 's3'

File.open('db/new-dictionary.sqlite3', 'wb') do |f|
f.write URI.parse(database_url).open.read
rescue OpenURI::HTTPError
sleep 5 # Wait a few seconds before retrying
retry
end
download_s3_uri(database_s3_location, 'db/new-dictionary.sqlite3')

database = SQLite3::Database.open('db/new-dictionary.sqlite3')
raise 'Database does not pass integrity check' unless database.integrity_check == [['ok']]

version = database.get_int_pragma('user_version')

FileUtils.mv('db/new-dictionary.sqlite3', 'db/dictionary.sqlite3')

puts "Updated db/dictionary.sqlite3 to #{release['name']}"
puts "Updated db/dictionary.sqlite3 to #{version}"
end

def s3_client
@s3_client ||= Aws::S3::Client.new({
region: ENV.fetch('DICTIONARY_AWS_REGION', ENV.fetch('AWS_REGION', nil)),
access_key_id: ENV.fetch('DICTIONARY_AWS_ACCESS_KEY_ID', nil),
secret_access_key: ENV.fetch('DICTIONARY_AWS_SECRET_ACCESS_KEY', nil)
}.compact)
end

def download_s3_uri(s3_uri, target)
bucket = s3_uri.host
key = s3_uri.path[1..]

begin
s3_client.get_object({ bucket:, key: }, target:)
rescue Aws::Errors::MissingCredentialsError,
Aws::Sigv4::Errors::MissingCredentialsError,
Aws::S3::Errors::ServiceError

# Fallback to public-URL download over HTTP if credentials are not provided or invalid.
# TODO use aws-sdk to leverage aws-client optimizations once unsigned requests are supported:
# https://github.com/aws/aws-sdk-ruby/issues/1149
public_url = URI.parse(Aws::S3::Bucket.new(bucket, credentials: 0).object(key).public_url)
Net::HTTP.start(public_url.host, public_url.port, use_ssl: true) do |http|
response = http.get(public_url.request_uri).tap(&:value)
File.binwrite(target, response.body)
end
end
end
end

0 comments on commit acfcdb1

Please sign in to comment.