diff --git a/Makefile b/Makefile index 310c603..27cb0fb 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,8 @@ collectstatic: run.jarbas seed: run.jarbas docker-compose run --rm jarbas python manage.py loaddatasets - docker-compose run --rm jarbas python manage.py loadsuppliers + docker-compose run --rm jarbas python manage.py reimbursements /tmp/serenata-data/reimbursements.xz + docker-compose run --rm jarbas python manage.py companies /tmp/serenata-data/2016-09-03-companies.xz + docker-compose run --rm jarbas python manage.py irregularities /tmp/serenata-data/irregularities.xz run.devel: collectstatic diff --git a/README.md b/README.md index 1b85a1c..8b90e79 100644 --- a/README.md +++ b/README.md @@ -132,42 +132,45 @@ If you have [Docker](https://docs.docker.com/engine/installation/) (with [Docker ```console $ docker-compose up -d --build -$ docker-compose run --rm jarbas python manage.py migrate -$ docker-compose run --rm jarbas python manage.py ceapdatasets +$ docker-compose run --rm jarbas python manage.py migrate +$ docker-compose run --rm jarbas python manage.py ceapdatasets ``` - You can access it at [`localhost:80`](http://localhost:80/). However your database starts empty and you still have to collect your static files: ```console $ docker-compose run --rm jarbas python manage.py collectstatic --no-input $ docker-compose run --rm jarbas python manage.py loaddatasets -$ docker-compose run --rm jarbas python manage.py reimbursements -$ docker-compose run --rm jarbas python manage.py loadsupliers -$ python manage.py irregularities +$ docker-compose run --rm jarbas python manage.py reimbursements +$ docker-compose run --rm jarbas python manage.py irregularities +$ docker-compose run --rm jarbas python manage.py companies ``` -There are some cleaver shortcuts in the `Makefile` if you like it. +You can get the datasets running [Rosie](https://github.com/datasciencebr/rosie) or directly with the [toolbox](https://github.com/datasciencebr/rosie). + +Also there are some cleaver shortcuts in the `Makefile` if you like it. ### Local install #### Requirements -The app is based in [Python 3.5](http://python.org) and [Node.js 6](http://nodejs.org). Once you have `pip` and `npm` available, install the dependencies: +Jarbas requires [Python 3.5](http://python.org), [Node.js 6](http://nodejs.org). and [PostgreSQL 9.4+](https://www.postgresql.org). + +Once you have `pip` and `npm` available install the dependencies: ```console -npm i +npm install python -m pip install -r requirements.txt ``` -Minor details on requirements: +##### Python's `lzma` module + +In some Linux distros `lzma` is not installed by default. You can check whether you have it or not with `$ python -m lzma`. In Debian based systems you can fix that with `$ apt-get install liblzma-dev` or in macOS with `$ brew install xz` — but you mihght have to re-compile your Python. -* **`lzma`**: In some Linux distros `lzma` is not installed by default. You can check whether you have it or not with `$ python -m lzma`. In Debian based systems you can fix that with `$ apt-get install liblzma-dev` but you mihght have to re-compile your Python. Some macOS Users might have the same problem. To check if you have `lzma` you can use `$ python -m lmza`. To fix it you need to install `lzma` using `$ brew install xz` and after that you need to recompile Python, and an way to do it is through `$ brew upgrade --cleanup python`. -* **`psycopg2`**: The `requirements.txt` file is prepared to use [PostgresSQL](https://www.postgresql.org) and `psycopg2` might fail if you don't have Postgres installed locally. #### Settings -Copy `contrib/.env.sample` as `.env` in the project's root folder and adjust your settings. These are the main environment settings: +Copy `contrib/.env.sample` as `.env` in the project's root folder and adjust your settings. These are the main variables: ##### Django settings @@ -187,8 +190,6 @@ Copy `contrib/.env.sample` as `.env` in the project's root folder and adjust you * `AMAZON_S3_BUCKET` (_str_) Name of the Amazon S3 bucket to look for datasets (e.g. `serenata-de-amor-data`) * `AMAZON_S3_REGION` (_str_) Region of the Amazon S3 (e.g. `s3-sa-east-1`) * `AMAZON_S3_DATASET_DATE` (_str_) Datasets file name prefix of CEAP datasets from Serenata de Amor (e.g. `2016-08-08` for `2016-08-08-current-year.xz`) -* `AMAZON_S3_REIMBURSEMENTS_DATE` (_str_) Reumbursements dataset file name date prefix (e.g. `2016-12-06` for `2016-12-06-reimbursements.xz`) -* `AMAZON_S3_COMPANIES_DATE` (_str_) Suppliers (companies) datasets file name date prefix (e.g. `2016-08-08` for `2016-08-08-companies.xz`) * `AMAZON_S3_CEAPTRANSLATION_DATE` (_str_) File name prefix for dataset guide (e.g. `2016-08-08` for `2016-08-08-ceap-datasets.md`) ##### Google settings @@ -210,19 +211,13 @@ Now you can load the data from our datasets and get some other data as static fi ``` $ python manage.py loaddatasets -$ python manage.py loadsuppliers -$ python manage.py reimbursements +$ python manage.py reimbursements +$ python manage.py irregularities +$ python manage.py companies $ python manage.py ceapdatasets ``` -Use `python manage.py loaddatasets --help` and `python manage.py loadsuppliers --help` to check options on limiting the number of documents to be loaded from the datasets. - -If [Rosie](https://github.com/datasciencebr/rosie) was kind enough to give you -a `irregularities.xz`, you can load it with: - -``` -$ python manage.py irregularities -``` +You can get the datasets running [Rosie](https://github.com/datasciencebr/rosie) or directly with the [toolbox](https://github.com/datasciencebr/rosie). #### Generate static files diff --git a/contrib/.env.sample b/contrib/.env.sample index 44e84a0..daf4ed6 100644 --- a/contrib/.env.sample +++ b/contrib/.env.sample @@ -9,7 +9,5 @@ AMAZON_S3_REGION=s3-sa-east-1 AMAZON_S3_CEAPTRANSLATION_DATE=2016-08-08 AMAZON_S3_DATASET_DATE=2016-08-08 -AMAZON_S3_REIMBURSEMENTS_DATE=2016-12-06 -AMAZON_S3_SUPPLIERS_DATE=2016-09-03 GOOGLE_STREET_VIEW_API_KEY=my-google-places-api-key diff --git a/jarbas/core/management/commands/__init__.py b/jarbas/core/management/commands/__init__.py index c268740..0177a4a 100644 --- a/jarbas/core/management/commands/__init__.py +++ b/jarbas/core/management/commands/__init__.py @@ -10,53 +10,13 @@ class LoadCommand(BaseCommand): - def add_arguments(self, parser): - parser.add_argument( - '--source', '-s', dest='source', default=None, - help='Data directory of Serenata de Amor (dataset source)' - ) - parser.add_argument( - '--drop-all', '-d', dest='drop', action='store_true', - help='Drop all existing records before loading the datasets' - ) - parser.add_argument( - '--dataset-version', dest='dataset_version', default=None, - help='Dataset file version (usualy a YYYY-MM-DD date)' - ) - - def get_dataset(self, name): - if self.source: - return self.load_local(self.source, name) - return self.load_remote(name) - - def load_remote(self, name): - """Load a document from Amazon S3""" - url = self.get_url(name) - print("Loading " + url) - with NamedTemporaryFile(delete=False) as tmp: - urlretrieve(url, filename=tmp.name) - return tmp.name - - def load_local(self, source, name): - """Load documents from local source""" - path = self.get_path(source, name) - - if not os.path.exists(path): - print(path + " not found") - return None - - print("Loading " + path) - return path - - def get_url(self, suffix): - return 'https://{region}.amazonaws.com/{bucket}/{file_name}'.format( - region=settings.AMAZON_S3_REGION, - bucket=settings.AMAZON_S3_BUCKET, - file_name=self.get_file_name(suffix) - ) - - def get_path(self, source, name): - return os.path.join(source, self.get_file_name(name)) + def add_arguments(self, parser, add_drop_all=True): + parser.add_argument('dataset', help='Path to the .xz dataset') + if add_drop_all: + parser.add_argument( + '--drop-all', '-d', dest='drop', action='store_true', + help='Drop all existing records before loading the datasets' + ) @staticmethod def to_number(value, cast=None): @@ -91,12 +51,6 @@ def to_date(text): except ValueError: return None - def get_file_name(self, name): - if not self.date: - settings_name = 'AMAZON_S3_{}_DATE'.format(name.upper()) - self.date = getattr(settings, settings_name) - return '{date}-{name}.xz'.format(date=self.date, name=name) - def drop_all(self, model): if model.objects.count() != 0: msg = 'Deleting all existing records from {} model' @@ -115,3 +69,61 @@ def print_count(self, model, **kwargs): @staticmethod def get_model_name(model): return model._meta.label.split('.')[-1] + + +class OldLoadCommand(LoadCommand): + + def add_arguments(self, parser): + parser.add_argument( + '--drop-all', '-d', dest='drop', action='store_true', + help='Drop all existing records before loading the datasets' + ) + parser.add_argument( + '--source', '-s', dest='source', default=None, + help='Data directory of Serenata de Amor (dataset source)' + ) + parser.add_argument( + '--dataset-version', dest='dataset_version', default=None, + help='Dataset file version (usualy a YYYY-MM-DD date)' + ) + + def get_dataset(self, name): + if self.source: + return self.load_local(self.source, name) + return self.load_remote(name) + + def load_remote(self, name): + """Load a document from Amazon S3""" + url = self.get_url(name) + print("Loading " + url) + with NamedTemporaryFile(delete=False) as tmp: + urlretrieve(url, filename=tmp.name) + return tmp.name + + def load_local(self, source, name): + """Load documents from local source""" + path = self.get_path(source, name) + + if not os.path.exists(path): + print(path + " not found") + return None + + print("Loading " + path) + return path + + def get_url(self, suffix): + return 'https://{region}.amazonaws.com/{bucket}/{file_name}'.format( + region=settings.AMAZON_S3_REGION, + bucket=settings.AMAZON_S3_BUCKET, + file_name=self.get_file_name(suffix) + ) + + def get_path(self, source, name): + return os.path.join(source, self.get_file_name(name)) + + def get_file_name(self, name): + if not self.date: + settings_name = 'AMAZON_S3_{}_DATE'.format(name.upper()) + self.date = getattr(settings, settings_name) + return '{date}-{name}.xz'.format(date=self.date, name=name) + diff --git a/jarbas/core/management/commands/loadsuppliers.py b/jarbas/core/management/commands/companies.py similarity index 90% rename from jarbas/core/management/commands/loadsuppliers.py rename to jarbas/core/management/commands/companies.py index 2300129..6956f9b 100644 --- a/jarbas/core/management/commands/loadsuppliers.py +++ b/jarbas/core/management/commands/companies.py @@ -12,10 +12,8 @@ class Command(LoadCommand): help = 'Load Serenata de Amor supplier dataset into the database' def handle(self, *args, **options): - self.date = options.get('dataset_version') - self.source = options.get('source') + self.path = options['dataset'] self.count = self.print_count(Supplier) - print('self.cont =', self.count) print('Starting with {:,} suppliers'.format(self.count)) if options.get('drop', False): @@ -23,16 +21,16 @@ def handle(self, *args, **options): self.drop_all(Activity) self.count = 0 - self.save_suppliers(self.get_dataset('companies')) + self.save_suppliers() - def save_suppliers(self, dataset): + def save_suppliers(self): """ Receives path to the dataset file and create a Supplier object for each row of each file. It creates the related activity when needed. """ skip = ('main_activity', 'secondary_activty') keys = list(f.name for f in Supplier._meta.fields if f not in skip) - with lzma.open(dataset, mode='rt') as file_handler: + with lzma.open(self.path, mode='rt') as file_handler: for row in csv.DictReader(file_handler): main, secondary = self.save_activities(row) diff --git a/jarbas/core/management/commands/irregularities.py b/jarbas/core/management/commands/irregularities.py index f1a4f9c..d1dab84 100644 --- a/jarbas/core/management/commands/irregularities.py +++ b/jarbas/core/management/commands/irregularities.py @@ -12,14 +12,10 @@ class Command(LoadCommand): filter_keys = ('applicant_id', 'document_id', 'year') def add_arguments(self, parser): - parser.add_argument( - '--irregularities', '-i', dest='irregularities_path', - default='irregularities.xz', - help='Path to the irregularities.xz dataset' - ) + super().add_arguments(parser, add_drop_all=False) def handle(self, *args, **options): - self.path = options.get('irregularities_path', 'irregularities.xz') + self.path = options['dataset'] if not os.path.exists(self.path): raise FileNotFoundError(os.path.abspath(self.path)) diff --git a/jarbas/core/management/commands/loaddatasets.py b/jarbas/core/management/commands/loaddatasets.py index c9cb227..054a44d 100644 --- a/jarbas/core/management/commands/loaddatasets.py +++ b/jarbas/core/management/commands/loaddatasets.py @@ -5,11 +5,11 @@ from django.conf import settings -from jarbas.core.management.commands import LoadCommand +from jarbas.core.management.commands import OldLoadCommand from jarbas.core.models import Document -class Command(LoadCommand): +class Command(OldLoadCommand): help = 'Load Serenata de Amor datasets into the database' suffixes = ('current-year', 'last-year', 'previous-years') diff --git a/jarbas/core/management/commands/reimbursements.py b/jarbas/core/management/commands/reimbursements.py index e38534f..620fb96 100644 --- a/jarbas/core/management/commands/reimbursements.py +++ b/jarbas/core/management/commands/reimbursements.py @@ -16,8 +16,7 @@ def add_arguments(self, parser): ) def handle(self, *args, **options): - self.date = options.get('dataset_version') - self.source = options.get('source') + self.path = options['dataset'] self.count = Reimbursement.objects.count() print('Starting with {:,} reimbursements'.format(self.count)) @@ -31,8 +30,7 @@ def handle(self, *args, **options): @property def reimbursements(self): """Returns a Generator with a Reimbursement object for each row.""" - dataset = self.get_dataset('reimbursements') - with lzma.open(dataset, mode='rt') as file_handler: + with lzma.open(self.path, mode='rt') as file_handler: for row in csv.DictReader(file_handler): yield Reimbursement(**self.serialize(row)) diff --git a/jarbas/core/tests/test_loadsuppliers_command.py b/jarbas/core/tests/test_companies_command.py similarity index 65% rename from jarbas/core/tests/test_loadsuppliers_command.py rename to jarbas/core/tests/test_companies_command.py index f6fa4e2..086e18b 100644 --- a/jarbas/core/tests/test_loadsuppliers_command.py +++ b/jarbas/core/tests/test_companies_command.py @@ -4,7 +4,7 @@ from django.test import TestCase -from jarbas.core.management.commands.loadsuppliers import Command +from jarbas.core.management.commands.companies import Command from jarbas.core.models import Activity, Supplier from jarbas.core.tests import sample_supplier_data @@ -61,11 +61,11 @@ def test_save_activities(self, update_or_create): self.assertEqual(1, len(main)) self.assertEqual(99, len(secondaries)) - @patch('jarbas.core.management.commands.loadsuppliers.lzma') - @patch('jarbas.core.management.commands.loadsuppliers.csv.DictReader') - @patch('jarbas.core.management.commands.loadsuppliers.Command.save_activities') - @patch('jarbas.core.management.commands.loadsuppliers.Command.serialize') - @patch('jarbas.core.management.commands.loadsuppliers.Command.print_count') + @patch('jarbas.core.management.commands.companies.lzma') + @patch('jarbas.core.management.commands.companies.csv.DictReader') + @patch('jarbas.core.management.commands.companies.Command.save_activities') + @patch('jarbas.core.management.commands.companies.Command.serialize') + @patch('jarbas.core.management.commands.companies.Command.print_count') @patch.object(Supplier.objects, 'create') def test_save_suppliers(self, create, print_count, serialize, save_activities, rows, lzma): self.command.count = 0 @@ -73,7 +73,8 @@ def test_save_suppliers(self, create, print_count, serialize, save_activities, r rows.return_value = [sample_supplier_data] serialize.return_value = dict(ahoy=42) save_activities.return_value = ([3], [14, 15]) - self.command.save_suppliers([]) + self.command.path = 'companies.xz' + self.command.save_suppliers() create.assert_called_with(ahoy=42) create.return_value.main_activity.add.assert_called_with(3) self.assertEqual(2, create.return_value.secondary_activity.add.call_count) @@ -81,30 +82,26 @@ def test_save_suppliers(self, create, print_count, serialize, save_activities, r class TestConventionMethods(TestCommand): - @patch('jarbas.core.management.commands.loadsuppliers.print') - @patch('jarbas.core.management.commands.loadsuppliers.LoadCommand.load_remote') - @patch('jarbas.core.management.commands.loadsuppliers.LoadCommand.drop_all') - @patch('jarbas.core.management.commands.loadsuppliers.Command.save_suppliers') - @patch('jarbas.core.management.commands.loadsuppliers.Command.print_count') - def test_handler_without_options(self, print_count, save_suppliers, drop_all, load_remote, print_): + @patch('jarbas.core.management.commands.companies.print') + @patch('jarbas.core.management.commands.companies.LoadCommand.drop_all') + @patch('jarbas.core.management.commands.companies.Command.save_suppliers') + @patch('jarbas.core.management.commands.companies.Command.print_count') + def test_handler_without_options(self, print_count, save_suppliers, drop_all, print_): print_count.return_value = 0 - self.command.handle(source=None) + self.command.handle(dataset='companies.xz') print_.assert_called_with('Starting with 0 suppliers') - self.assertEqual(1, load_remote.call_count) self.assertEqual(1, save_suppliers.call_count) self.assertEqual(1, print_count.call_count) + self.assertEqual('companies.xz', self.command.path) drop_all.assert_not_called() - @patch('jarbas.core.management.commands.loadsuppliers.print') - @patch('jarbas.core.management.commands.loadsuppliers.LoadCommand.load_local') - @patch('jarbas.core.management.commands.loadsuppliers.Command.drop_all') - @patch('jarbas.core.management.commands.loadsuppliers.Command.save_suppliers') - @patch('jarbas.core.management.commands.loadsuppliers.Command.print_count') - def test_handler_with_options(self, print_count, save_suppliers, drop_all, load_local, print_): + @patch('jarbas.core.management.commands.companies.print') + @patch('jarbas.core.management.commands.companies.Command.drop_all') + @patch('jarbas.core.management.commands.companies.Command.save_suppliers') + @patch('jarbas.core.management.commands.companies.Command.print_count') + def test_handler_with_options(self, print_count, save_suppliers, drop_all, print_): print_count.return_value = 0 - self.command.handle(source='ahoy', drop=True, dataset_version='1') + self.command.handle(dataset='companies.xz', drop=True) print_.assert_called_with('Starting with 0 suppliers') self.assertEqual(2, drop_all.call_count) - self.assertEqual(1, load_local.call_count) self.assertEqual(1, save_suppliers.call_count) - self.assertEqual('1', self.command.date) diff --git a/jarbas/core/tests/test_irregularities_command.py b/jarbas/core/tests/test_irregularities_command.py index 8385c42..9f2fe2a 100644 --- a/jarbas/core/tests/test_irregularities_command.py +++ b/jarbas/core/tests/test_irregularities_command.py @@ -84,29 +84,19 @@ class TestConventionMethods(TestCommand): @patch('jarbas.core.management.commands.irregularities.Command.update') @patch('jarbas.core.management.commands.irregularities.os.path.exists') @patch('jarbas.core.management.commands.irregularities.print') - def test_handler_without_options(self, print_, exists, update, irregularities): - self.command.handle() + def test_handler(self, print_, exists, update, irregularities): + self.command.handle(dataset='irregularities.xz') update.assert_called_once_with(irregularities) print_.assert_called_once_with('0 reimbursements updated.') self.assertEqual(self.command.path, 'irregularities.xz') - @patch('jarbas.core.management.commands.irregularities.Command.irregularities') - @patch('jarbas.core.management.commands.irregularities.Command.update') - @patch('jarbas.core.management.commands.irregularities.os.path.exists') - @patch('jarbas.core.management.commands.irregularities.print') - def test_handler_with_options(self, print_, exists, update, irregularities): - self.command.handle(irregularities_path='0') - update.assert_called_once_with(irregularities) - print_.assert_called_once_with('0 reimbursements updated.') - self.assertEqual('0', self.command.path) - @patch('jarbas.core.management.commands.irregularities.Command.irregularities') @patch('jarbas.core.management.commands.irregularities.Command.update') @patch('jarbas.core.management.commands.irregularities.os.path.exists') def test_handler_with_non_existing_file(self, exists, update, irregularities): exists.return_value = False with self.assertRaises(FileNotFoundError): - self.command.handle() + self.command.handle(dataset='irregularities.xz') update.assert_not_called() @@ -115,8 +105,7 @@ class TestFileLoader(TestCommand): @patch('jarbas.core.management.commands.irregularities.lzma') @patch('jarbas.core.management.commands.irregularities.csv.DictReader') @patch('jarbas.core.management.commands.irregularities.Command.serialize') - @patch('jarbas.core.management.commands.irregularities.Command.get_dataset') - def test_irregularities_property(self, get_dataset, serialize, rows, lzma): + def test_irregularities_property(self, serialize, rows, lzma): lzma.return_value = StringIO() rows.return_value = range(42) self.command.path = 'irregularities.xz' diff --git a/jarbas/core/tests/test_load_command.py b/jarbas/core/tests/test_load_command.py index 4152e88..d2835f4 100644 --- a/jarbas/core/tests/test_load_command.py +++ b/jarbas/core/tests/test_load_command.py @@ -13,12 +13,6 @@ class TestStaticMethods(TestCase): def setUp(self): self.cmd = LoadCommand() - def test_get_file_name(self): - self.cmd.date = None - expected = '1970-01-01-companies.xz' - with self.settings(AMAZON_S3_COMPANIES_DATE='1970-01-01'): - self.assertEqual(expected, self.cmd.get_file_name('companies')) - def test_get_model_name(self): self.assertEqual('Activity', self.cmd.get_model_name(Activity)) @@ -79,66 +73,14 @@ def test_drop_all(self, mock_print): self.assertEqual(0, Activity.objects.count()) -class TestLocalMethods(TestCase): - - def setUp(self): - self.cmd = LoadCommand() - self.source = '/whatever/works' - self.name = 'companies' - - def test_get_path(self): - self.cmd.date = None - expected = '/whatever/works/1970-01-01-companies.xz' - with self.settings(AMAZON_S3_COMPANIES_DATE='1970-01-01'): - result = self.cmd.get_path(self.source, self.name) - self.assertEqual(expected, result) - - @patch('jarbas.core.management.commands.print') - @patch('jarbas.core.management.commands.os.path.exists') - def test_load_local_exists(self, mock_exists, mock_print): - self.cmd.date = None - mock_exists.return_value = True - self.assertIsInstance(self.cmd.load_local(self.source, self.name), str) - - @patch('jarbas.core.management.commands.print') - @patch('jarbas.core.management.commands.os.path.exists') - def test_load_local_fail(self, mock_exists, mock_print): - self.cmd.date = None - mock_exists.return_value = False - self.assertFalse(self.cmd.load_local(self.source, self.name)) - - -class TestRemoteMethods(TestCase): - - def setUp(self): - self.cmd = LoadCommand() - self.name = 'companies' - self.url = 'https://south.amazonaws.com/jarbas/1970-01-01-companies.xz' - self.custom_settings = { - 'AMAZON_S3_COMPANIES_DATE': '1970-01-01', - 'AMAZON_S3_REGION': 'south', - 'AMAZON_S3_BUCKET': 'jarbas' - } - - def test_get_url(self): - self.cmd.date = None - with self.settings(**self.custom_settings): - result = self.cmd.get_url(self.name) - self.assertEqual(self.url, result) - - @patch('jarbas.core.management.commands.print') - @patch('jarbas.core.management.commands.urlretrieve') - def test_load_remote(self, mock_urlretrieve, mock_print): - self.cmd.date = None - with self.settings(**self.custom_settings): - result = self.cmd.load_remote(self.name) - self.assertEqual(self.url, mock_urlretrieve.call_args[0][0]) - self.assertIsInstance(result, str) - - class TestAddArguments(TestCase): def test_add_arguments(self): mock = Mock() LoadCommand().add_arguments(mock) - self.assertEqual(3, mock.add_argument.call_count) \ No newline at end of file + self.assertEqual(2, mock.add_argument.call_count) + + def test_add_arguments_without_drop_all(self): + mock = Mock() + LoadCommand().add_arguments(mock, add_drop_all=False) + self.assertEqual(1, mock.add_argument.call_count) \ No newline at end of file diff --git a/jarbas/core/tests/test_loaddatasets_command.py b/jarbas/core/tests/test_loaddatasets_command.py index ddbb00c..2c7c450 100644 --- a/jarbas/core/tests/test_loaddatasets_command.py +++ b/jarbas/core/tests/test_loaddatasets_command.py @@ -88,7 +88,7 @@ def test_get_suffix(self): self.assertEqual(self.command.get_suffix(name), 'current-year') self.assertEqual(self.command.get_suffix(''), None) - @patch('jarbas.core.management.commands.loaddatasets.LoadCommand.load_local') + @patch('jarbas.core.management.commands.loaddatasets.OldLoadCommand.load_local') def test_get_load_local(self, super_load_local): list(self.command.load_local('ahoy')) expected = ( @@ -98,7 +98,7 @@ def test_get_load_local(self, super_load_local): ) super_load_local.assert_has_calls(expected) - @patch('jarbas.core.management.commands.loaddatasets.LoadCommand.load_remote') + @patch('jarbas.core.management.commands.loaddatasets.OldLoadCommand.load_remote') def test_get_load_remote(self, super_load_remote): list(self.command.load_remote()) expected = ( @@ -168,7 +168,7 @@ def test_handler_with_options(self, drop_all, bulk_create_by, documents_from, lo bulk_create_by.assert_called_once_with((1, 2, 3), 42) self.assertEqual('1', self.command.date) - @patch('jarbas.core.management.commands.loaddatasets.LoadCommand.add_arguments') + @patch('jarbas.core.management.commands.loaddatasets.OldLoadCommand.add_arguments') def test_add_arguments(self, super_add_arguments): parser = MagicMock() self.command.add_arguments(parser) diff --git a/jarbas/core/tests/test_old_load_command.py b/jarbas/core/tests/test_old_load_command.py new file mode 100644 index 0000000..bdb330a --- /dev/null +++ b/jarbas/core/tests/test_old_load_command.py @@ -0,0 +1,101 @@ +from unittest.mock import Mock, patch + +from django.test import TestCase + +from jarbas.core.management.commands import OldLoadCommand + + +class TestFileLoader(TestCase): + + def setUp(self): + self.cmd = OldLoadCommand() + self.cmd.date = '1970-01-01' + + @patch('jarbas.core.management.commands.OldLoadCommand.load_remote') + @patch('jarbas.core.management.commands.print') + def test_get_database(self, print_, load_remote): + self.cmd.source = None + self.cmd.get_dataset('dataset') + load_remote.assert_called_once_with('dataset') + + @patch('jarbas.core.management.commands.os.path.exists') + @patch('jarbas.core.management.commands.print') + def test_get_database_with_source(self, print_, exists): + exists.return_value = True + self.cmd.source = '/whatever/works' + expected = '/whatever/works/1970-01-01-dataset.xz' + self.assertEqual(expected, self.cmd.get_dataset('dataset')) + print_.assert_called_once_with('Loading ' + expected) + + @patch('jarbas.core.management.commands.os.path.exists') + @patch('jarbas.core.management.commands.print') + def test_get_database_with_wrong_source(self, print_, exists): + exists.return_value = False + self.cmd.source = '/whatever/works' + expected = '/whatever/works/1970-01-01-dataset.xz' + self.assertIsNone(self.cmd.get_dataset('dataset')) + print_.assert_called_once_with(expected + ' not found') + +class TestLocalMethods(TestCase): + + def setUp(self): + self.cmd = OldLoadCommand() + self.source = '/whatever/works' + self.name = 'dataset' + + def test_get_path(self): + self.cmd.date = None + expected = '/whatever/works/1970-01-01-dataset.xz' + with self.settings(AMAZON_S3_DATASET_DATE='1970-01-01'): + result = self.cmd.get_path(self.source, self.name) + self.assertEqual(expected, result) + + @patch('jarbas.core.management.commands.print') + @patch('jarbas.core.management.commands.os.path.exists') + def test_load_local_exists(self, mock_exists, mock_print): + self.cmd.date = None + mock_exists.return_value = True + self.assertIsInstance(self.cmd.load_local(self.source, self.name), str) + + @patch('jarbas.core.management.commands.print') + @patch('jarbas.core.management.commands.os.path.exists') + def test_load_local_fail(self, mock_exists, mock_print): + self.cmd.date = None + mock_exists.return_value = False + self.assertFalse(self.cmd.load_local(self.source, self.name)) + + +class TestRemoteMethods(TestCase): + + def setUp(self): + self.cmd = OldLoadCommand() + self.name = 'companies' + self.url = 'https://south.amazonaws.com/jarbas/1970-01-01-companies.xz' + self.custom_settings = { + 'AMAZON_S3_COMPANIES_DATE': '1970-01-01', + 'AMAZON_S3_REGION': 'south', + 'AMAZON_S3_BUCKET': 'jarbas' + } + + def test_get_url(self): + self.cmd.date = None + with self.settings(**self.custom_settings): + result = self.cmd.get_url(self.name) + self.assertEqual(self.url, result) + + @patch('jarbas.core.management.commands.print') + @patch('jarbas.core.management.commands.urlretrieve') + def test_load_remote(self, mock_urlretrieve, mock_print): + self.cmd.date = None + with self.settings(**self.custom_settings): + result = self.cmd.load_remote(self.name) + self.assertEqual(self.url, mock_urlretrieve.call_args[0][0]) + self.assertIsInstance(result, str) + + +class TestAddArguments(TestCase): + + def test_add_arguments(self): + mock = Mock() + OldLoadCommand().add_arguments(mock) + self.assertEqual(3, mock.add_argument.call_count) \ No newline at end of file diff --git a/jarbas/core/tests/test_reimbursements_command.py b/jarbas/core/tests/test_reimbursements_command.py index f997125..c898fe5 100644 --- a/jarbas/core/tests/test_reimbursements_command.py +++ b/jarbas/core/tests/test_reimbursements_command.py @@ -119,9 +119,10 @@ class TestConventionMethods(TestCommand): @patch('jarbas.core.management.commands.reimbursements.Command.print_count') def test_handler_without_options(self, print_count, bulk_create_by, reimbursements, print_): reimbursements.return_value = (1, 2, 3) - self.command.handle(batch_size=42, source=False) + self.command.handle(dataset='reimbursements.xz', batch_size=42) print_.assert_called_once_with('Starting with 0 reimbursements') bulk_create_by.assert_called_once_with(reimbursements, 42) + self.assertEqual('reimbursements.xz', self.command.path) @patch('jarbas.core.management.commands.reimbursements.print') @patch('jarbas.core.management.commands.reimbursements.Command.reimbursements') @@ -129,11 +130,10 @@ def test_handler_without_options(self, print_count, bulk_create_by, reimbursemen @patch('jarbas.core.management.commands.reimbursements.Command.drop_all') @patch('jarbas.core.management.commands.reimbursements.Command.print_count') def test_handler_with_options(self, print_count, drop_all, bulk_create_by, reimbursements, print_): - self.command.handle(batch_size=1, source='ahoy', drop=True, dataset_version='1') + self.command.handle(dataset='reimbursements.xz', batch_size=1, drop=True) print_.assert_called_once_with('Starting with 0 reimbursements') drop_all.assert_called_once_with(Reimbursement) bulk_create_by.assert_called_once_with(reimbursements, 1) - self.assertEqual('1', self.command.date) @patch('jarbas.core.management.commands.reimbursements.LoadCommand.add_arguments') def test_add_arguments(self, super_add_arguments): @@ -149,9 +149,9 @@ class TestFileLoader(TestCommand): @patch('jarbas.core.management.commands.reimbursements.csv.DictReader') @patch('jarbas.core.management.commands.reimbursements.Reimbursement') @patch('jarbas.core.management.commands.reimbursements.Command.serialize') - @patch('jarbas.core.management.commands.reimbursements.Command.get_dataset') - def test_reimbursement_property(self, get_dataset, serializer, reimbursement, row, lzma): + def test_reimbursement_property(self, serializer, reimbursement, row, lzma): lzma.return_value = StringIO() row.return_value = dict(ahoy=42) + self.command.path = 'reimbursements.xz' list(self.command.reimbursements) self.assertEqual(1, reimbursement.call_count) diff --git a/jarbas/settings.py b/jarbas/settings.py index 5e5d9e8..5a42567 100644 --- a/jarbas/settings.py +++ b/jarbas/settings.py @@ -136,10 +136,8 @@ AMAZON_S3_BUCKET = config('AMAZON_S3_BUCKET', default='serenata-de-amor-data') AMAZON_S3_REGION = config('AMAZON_S3_REGIN', default='s3-sa-east-1') + AMAZON_S3_DATASET_DATE = config('AMAZON_S3_DATASET_DATE', default='2016-11-19') -AMAZON_S3_REIMBURSEMENTS_DATE = config('AMAZON_S3_REIMBURSEMENTS_DATE', default='2016-12-06') -AMAZON_S3_IRREGULARITIES_DATE = config('AMAZON_S3_IRREGULARITIES_DATE', default='2016-12-11') -AMAZON_S3_COMPANIES_DATE = config('AMAZON_S3_COMPANIES_DATE', default='2016-09-03') AMAZON_S3_CEAPTRANSLATION_DATE = config('AMAZON_S3_CEAPTRANSLATION_DATE', default='2016-08-08') # Django REST Framework