diff --git a/app/services/deepblue/yaml_populate_service.rb b/app/services/deepblue/yaml_populate_service.rb index 6bdc3b3e..16c12017 100644 --- a/app/services/deepblue/yaml_populate_service.rb +++ b/app/services/deepblue/yaml_populate_service.rb @@ -327,7 +327,7 @@ def yaml_populate_collection( collection:, collection = Collection.find collection if collection.is_a? String target_file = yaml_filename_collection( pathname_dir: dir, collection: collection ) target_dir = yaml_targetdir_collection( pathname_dir: dir, collection: collection ) - Dir.mkdir( target_dir ) unless Dir.exist? target_dir + Dir.mkdir( target_dir ) if export_files && !Dir.exist?( target_dir ) open( target_file, 'w' ) do |out2| yaml_populate_collection( collection: collection, out: out2, @@ -398,8 +398,6 @@ def yaml_populate_users( dir: MetadataHelper2::DEFAULT_BASE_DIR, out: nil, targe Dir.mkdir( dir ) unless Dir.exist? dir if out.nil? target_file = yaml_filename_users( pathname_dir: dir, task: mode ) - # target_dir = yaml_targetdir_users( pathname_dir: dir, task: mode ) - # Dir.mkdir( target_dir ) unless Dir.exist? target_dir open( target_file, 'w' ) do |out2| yaml_populate_users( out: out2, target_filename: target_file ) end @@ -434,7 +432,7 @@ def yaml_populate_work( curation_concern:, curation_concern = yaml_work_find( curation_concern: curation_concern ) if curation_concern.is_a? String target_file = yaml_filename_work( pathname_dir: dir, work: curation_concern ) target_dir = yaml_targetdir_work( pathname_dir: dir, work: curation_concern ) - Dir.mkdir( target_dir ) unless Dir.exist? target_dir + Dir.mkdir( target_dir ) if export_files && !Dir.exist?( target_dir ) open( target_file, 'w' ) do |out2| yaml_populate_work( curation_concern: curation_concern, out: out2, diff --git a/lib/tasks/yaml_populate.rb b/lib/tasks/yaml_populate.rb index 568e16c1..9cc5b2ae 100644 --- a/lib/tasks/yaml_populate.rb +++ b/lib/tasks/yaml_populate.rb @@ -76,6 +76,27 @@ def report_work( first_id:, measurements:, total: nil ) TaskHelper.benchmark_report( label: 'work id', first_id: first_id, measurements: measurements, total: total ) end + def run_all + total = nil + measurements = [] + curation_concerns = if 'work' == @populate_type + TaskHelper.all_works + else + Collection.all + end + curation_concerns.each do |cc| + @ids << cc.id + subtotal = run_one_curation_concern( curation_concern: cc ) + measurements << subtotal + if total.nil? + total = subtotal + else + total += subtotal + end + end + return measurements, total + end + def run_multiple( ids: ) total = nil measurements = [] @@ -102,28 +123,39 @@ def run_one( id: ) return measurement end - def yaml_populate_collection( id: ) + def run_one_curation_concern( curation_concern: ) + measurement = Benchmark.measure( curation_concern.id ) do + if 'work' == @populate_type + yaml_populate_work( id: curation_concern.id, work: curation_concern ) + else + yaml_populate_collection( id: curation_concern.id, collection: curation_concern ) + end + end + return measurement + end + + def yaml_populate_collection( id:, collection: nil ) puts "Exporting collection #{id} to '#{@target_dir}' with export files flag set to #{@export_files} and mode #{@mode}" service = YamlPopulateService.new( mode: @mode ) - service.yaml_populate_collection( collection: id, dir: @target_dir, export_files: @export_files ) + if collection.nil? + service.yaml_populate_collection( collection: id, dir: @target_dir, export_files: @export_files ) + else + service.yaml_populate_collection( collection: collection, dir: @target_dir, export_files: @export_files ) + end @populate_ids << id @populate_stats << service.yaml_populate_stats - # Deepblue::MetadataHelper.yaml_populate_collection( collection: id, - # dir: @target_dir, - # export_files: @export_files, - # mode: @mode ) end - def yaml_populate_work( id: ) + def yaml_populate_work( id:, work: nil ) puts "Exporting work #{id} to '#{@target_dir}' with export files flag set to #{@export_files} and mode #{@mode}" service = YamlPopulateService.new( mode: @mode ) - service.yaml_populate_work( curation_concern: id, dir: @target_dir, export_files: @export_files ) + if work.nil? + service.yaml_populate_work( curation_concern: id, dir: @target_dir, export_files: @export_files ) + else + service.yaml_populate_work( curation_concern: work, dir: @target_dir, export_files: @export_files ) + end @populate_ids << id @populate_stats << service.yaml_populate_stats - # Deepblue::MetadataHelper.yaml_populate_work( curation_concern: id, - # dir: @target_dir, - # export_files: @export_files, - # mode: @mode ) end end diff --git a/lib/tasks/yaml_populate_for_collection.rake b/lib/tasks/yaml_populate_for_collection.rake index dc515919..89c5b88f 100644 --- a/lib/tasks/yaml_populate_for_collection.rake +++ b/lib/tasks/yaml_populate_for_collection.rake @@ -13,7 +13,7 @@ namespace :deepblue do task.run end - # bundle exec rake umrdr:yaml_populate_from_multiple_collections['f4752g72m f4752g72m',/deepbluedata-prep,true] + # bundle exec rake deepblue:yaml_populate_from_multiple_collections['f4752g72m f4752g72m',/deepbluedata-prep,true] desc 'Yaml populate from multiple collections (ids separated by spaces)' task :yaml_populate_from_multiple_collections, %i[ ids options ] => :environment do |_task, args| args.with_defaults( options: '{}' ) @@ -21,6 +21,14 @@ namespace :deepblue do task.run end + # bundle exec rake deepblue:yaml_populate_from_all_collections['{"target_dir":"/deepbluedata-prep"\,"export_files":false\,"mode":"build"}'] + desc 'Yaml populate from all collections' + task :yaml_populate_from_all_collections, %i[ options ] => :environment do |_task, args| + args.with_defaults( options: '{}' ) + task = Umrdr::YamlPopulateFromAllCollections.new( options: args[:options] ) + task.run + end + end module Umrdr @@ -33,6 +41,24 @@ module Umrdr require 'benchmark' include Benchmark + class YamlPopulateFromAllCollections < Umrdr::YamlPopulate + + def initialize( options: ) + super( populate_type: 'collection', options: options ) + @export_files = task_options_value( key: 'export_files', default_value: false ) + @ids = [] + end + + def run + @ids = [] + measurements, total = run_all + return if @ids.empty? + report_stats + report_collection( first_id: @ids[0], measurements: measurements, total: total ) + end + + end + class YamlPopulateFromCollection < Umrdr::YamlPopulate def initialize( id:, options: ) diff --git a/lib/tasks/yaml_populate_for_work.rake b/lib/tasks/yaml_populate_for_work.rake index 2b25673a..a78d21eb 100644 --- a/lib/tasks/yaml_populate_for_work.rake +++ b/lib/tasks/yaml_populate_for_work.rake @@ -21,6 +21,14 @@ namespace :deepblue do task.run end + # bundle exec rake deepblue:yaml_populate_from_all_works['{"target_dir":"/deepbluedata-prep"\,"export_files":false\,"mode":"build"}'] + desc 'Yaml populate from all works' + task :yaml_populate_from_all_works, %i[ options ] => :environment do |_task, args| + args.with_defaults( options: '{}' ) + task = Umrdr::YamlPopulateFromAllWorks.new( options: args[:options] ) + task.run + end + end module Umrdr @@ -33,6 +41,24 @@ module Umrdr require 'benchmark' include Benchmark + class YamlPopulateFromAllWorks < Umrdr::YamlPopulate + + def initialize( options: ) + super( populate_type: 'work', options: options ) + @export_files = task_options_value( key: 'export_files', default_value: false ) + @ids = [] + end + + def run + @ids = [] + measurements, total = run_all + return if @ids.empty? + report_stats + report_work( first_id: @ids[0], measurements: measurements, total: total ) + end + + end + class YamlPopulateFromWork < Umrdr::YamlPopulate def initialize( id:, options: )