From df13ff9a1151bdcfc5ec1840ca1df89ed8d7d652 Mon Sep 17 00:00:00 2001 From: Mathieu Leplatre Date: Wed, 16 Oct 2024 10:02:14 +0200 Subject: [PATCH] Drop option that gzips files server side (#724) * Drop option that gzips files server side * Fixing lint error * Removed code that no longer applies with gzip removed --------- Co-authored-by: Alex Cottner --- README.rst | 60 -------------------------------- scripts/download.py | 21 ++--------- scripts/upload.py | 7 +--- src/kinto_attachment/__init__.py | 6 ++-- src/kinto_attachment/utils.py | 33 ------------------ tests/config/local_gzipped.ini | 16 --------- tests/config/s3_per_resource.ini | 4 +-- tests/test_plugin_setup.py | 9 ++--- tests/test_utils.py | 31 ----------------- tests/test_views_attachment.py | 29 ++++----------- 10 files changed, 21 insertions(+), 195 deletions(-) delete mode 100644 tests/config/local_gzipped.ini diff --git a/README.rst b/README.rst index 0de32de..fdbce93 100644 --- a/README.rst +++ b/README.rst @@ -120,29 +120,6 @@ Or a specific collection: kinto.attachment.resources.blog.articles.keep_old_files = true - -The ``gzipped`` option ----------------------- - -If you want uploaded files to get gzipped when stored (default: False): - -.. code-block:: ini - - kinto.attachment.gzipped = true - -Or only for a particular bucket: - -.. code-block:: ini - - kinto.attachment.resources.blog.gzipped = true - -Or a specific collection: - -.. code-block:: ini - - kinto.attachment.resources.blog.articles.gzipped = true - - The ``randomize`` option ------------------------ @@ -280,39 +257,6 @@ with the following fields: } -If the file is gzipped by the server, an ``original`` key is added in the ``attachment`` -key, containing the file info **before** it's gzipped. The ``attachment`` keys are -in that case referring to the gzipped file: - - -.. code-block:: json - - { - "data": { - "attachment": { - "filename": "IMG_20150219_174559.jpg.gz", - "hash": "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad", - "location": "http://cdn.service.org/files/ffa9c7b9-7561-406b-b7f9-e00ac94644ff.jpg.gz", - "mimetype": "application/x-gzip", - "size": 14818, - "original": { - "filename": "IMG_20150219_174559.jpg", - "hash": "hPME6i9avCf/LFaznYr+sHtwQEX7mXYHSu+vgtygpM8=", - "mimetype": "image/jpeg", - "size": 1481798 - } - }, - "id": "c2ce1975-0e52-4b2f-a5db-80166aeca688", - "last_modified": 1447834938251, - "theme": "orange", - "type": "wallpaper" - }, - "permissions": { - "write": ["basicauth:6de355038fd943a2dc91405063b91018bb5dd97a08d1beb95713d23c2909748f"] - } - } - - Usage ===== @@ -418,10 +362,6 @@ bucket and collection:: $ python3 scripts/upload.py --server=$SERVER --bucket=$BUCKET --collection=$COLLECTION --auth "token:mysecret" README.rst pictures/* -If the ``--gzip`` option is passed, the files are gzipped before upload. -Since the ``attachment`` attribute contains metadata of the compressed file -the original file metadata are stored in a ``original`` attribute. - See ``python3 scripts/upload.py --help`` for more details about options. diff --git a/scripts/download.py b/scripts/download.py index 20e88f0..ba47a63 100644 --- a/scripts/download.py +++ b/scripts/download.py @@ -1,4 +1,3 @@ -import gzip import hashlib import os @@ -19,14 +18,8 @@ def download_files(client, records, folder, chunk_size=1024): continue attachment = record['attachment'] - # Check if file was Gzipped during upload (see `upload.py`) - is_gzip = 'original' in attachment - if is_gzip: - filename = attachment['original']['filename'] - remote_hash = attachment['original']['hash'] - else: - filename = attachment['filename'] - remote_hash = attachment['hash'] + filename = attachment['filename'] + remote_hash = attachment['hash'] destination = os.path.join(folder, filename) @@ -45,15 +38,7 @@ def download_files(client, records, folder, chunk_size=1024): for chunk in resp.iter_content(chunk_size=chunk_size): if chunk: f.write(chunk) - - # Decompress the file if necessary. - if is_gzip: - with open(destination, 'wb') as f: - data = open(tmp_file, 'rb').read() - f.write(gzip.decompress(data)) - else: - os.rename(tmp_file, destination) - + os.rename(tmp_file, destination) print('Downloaded "%s"' % filename) diff --git a/scripts/upload.py b/scripts/upload.py index 954a82e..eaeb399 100644 --- a/scripts/upload.py +++ b/scripts/upload.py @@ -37,12 +37,7 @@ def files_to_upload(records, files, force=False): records_by_id.pop(record['id'], None) local_hash = sha256(open(filepath, 'rb').read()) - # If file was uploaded gzipped, compare with hash of - # uncompressed file. - remote_hash = record.get('original', {}).get('hash') - if not remote_hash: - remote_hash = record['attachment']['hash'] - + remote_hash = record['attachment']['hash'] # If hash has changed, upload ! if local_hash != remote_hash or force: print("File '%s' has changed." % filename) diff --git a/src/kinto_attachment/__init__.py b/src/kinto_attachment/__init__.py index 5f577cb..01074ca 100644 --- a/src/kinto_attachment/__init__.py +++ b/src/kinto_attachment/__init__.py @@ -21,8 +21,8 @@ def includeme(config): if setting_name.startswith("attachment.resources."): # Resource specific config parts = setting_name.replace("attachment.resources.", "").split(".") - # attachment.resources.{bid}.gzipped - # attachment.resources.{bid}.{cid}.gzipped + # attachment.resources.{bid}.randomize + # attachment.resources.{bid}.{cid}.randomize if len(parts) == 3: bucket_id, collection_id, name = parts resource_id = "/buckets/{}/collections/{}".format(bucket_id, collection_id) @@ -33,7 +33,7 @@ def includeme(config): message = "Configuration rule malformed: `{}`".format(setting_name) raise ConfigurationError(message) - if name in ("gzipped", "randomize", "keep_old_files"): + if name in ("randomize", "keep_old_files"): config.registry.attachment_resources[resource_id][name] = asbool(setting_value) else: message = "`{}` is not a supported setting name. Read `{}`".format( diff --git a/src/kinto_attachment/utils.py b/src/kinto_attachment/utils.py index 599c855..edba264 100644 --- a/src/kinto_attachment/utils.py +++ b/src/kinto_attachment/utils.py @@ -1,9 +1,7 @@ import cgi -import gzip import hashlib import json import os -from io import BytesIO from kinto.authorization import RouteFactory from kinto.core import utils as core_utils @@ -144,7 +142,6 @@ def delete_attachment(request, link_field=None, uri=None, keep_old_files=False): def save_file(request, content, folder=None, keep_link=True, replace=False): - gzipped = setting_value(request, "gzipped", default=False) randomize = setting_value(request, "randomize", default=True) overriden_mimetypes = {**DEFAULT_MIMETYPES} @@ -167,7 +164,6 @@ def save_file(request, content, folder=None, keep_link=True, replace=False): _, extension = os.path.splitext(filename) mimetype = overriden_mimetypes.get(extension, content.type) - original = None save_options = { "folder": folder, "randomize": randomize, @@ -175,33 +171,6 @@ def save_file(request, content, folder=None, keep_link=True, replace=False): "headers": {"Content-Type": mimetype}, } - if gzipped: - original = { - "filename": filename, - "hash": filehash, - "mimetype": mimetype, - "size": size, - } - mimetype = "application/x-gzip" - filename += ".gz" - content.filename = filename - save_options["extensions"] = ["gz"] - save_options["headers"]["Content-Type"] = mimetype - - # in-memory gzipping - out = BytesIO() - with gzip.GzipFile(fileobj=out, mode="w") as f: - f.write(filecontent) - - filecontent = out.getvalue() - out.seek(0) - content.file = out - - # We give the hash and size of the gzip content in the attachment - # metadata. - filehash = sha256(filecontent) - size = len(filecontent) - try: location = request.attachment.save(content, **save_options) except FileNotAllowed: @@ -217,8 +186,6 @@ def save_file(request, content, folder=None, keep_link=True, replace=False): "mimetype": mimetype, "size": size, } - if original is not None: - attachment["original"] = original if keep_link: # Store link between record and attachment (for later deletion). diff --git a/tests/config/local_gzipped.ini b/tests/config/local_gzipped.ini deleted file mode 100644 index 6173f35..0000000 --- a/tests/config/local_gzipped.ini +++ /dev/null @@ -1,16 +0,0 @@ -[app:main] -use = egg:kinto -kinto.userid_hmac_secret = some-secret-string -multiauth.policies = basicauth -kinto.experimental_collection_schema_validation = true - -kinto.includes = kinto.plugins.default_bucket kinto_attachment - -kinto.attachment.base_path = /tmp -kinto.attachment.base_url = https://cdn.firefox.net/ -kinto.attachment.extra.base_url = https://files.server.com/root/ -kinto.attachment.gzipped = true - -kinto.attachment.folder = {bucket_id}/{collection_id} - -kinto.attachment.extensions = default diff --git a/tests/config/s3_per_resource.ini b/tests/config/s3_per_resource.ini index 360982b..ab47d7b 100644 --- a/tests/config/s3_per_resource.ini +++ b/tests/config/s3_per_resource.ini @@ -18,5 +18,5 @@ kinto.attachment.aws.access_key = aws kinto.attachment.aws.secret_key = aws kinto.attachment.aws.bucket_name = myfiles -kinto.attachment.resources.fennec.gzipped = true -kinto.attachment.resources.fennec.experiments.gzipped = false +kinto.attachment.resources.fennec.randomize = true +kinto.attachment.resources.fennec.experiments.randomize = false diff --git a/tests/test_plugin_setup.py b/tests/test_plugin_setup.py index 0c4a9e2..82cae3f 100644 --- a/tests/test_plugin_setup.py +++ b/tests/test_plugin_setup.py @@ -2,13 +2,14 @@ import pytest from kinto import main as kinto_main -from kinto_attachment import __version__, includeme from pyramid import testing from pyramid.exceptions import ConfigurationError from pyramid_storage.gcloud import GoogleCloudStorage from pyramid_storage.interfaces import IFileStorage from pyramid_storage.s3 import S3FileStorage +from kinto_attachment import __version__, includeme + from . import BaseWebTestLocal @@ -37,7 +38,7 @@ def test_includeme_understand_authorized_resources_settings(self): config = self.includeme( settings={ "attachment.base_path": "/tmp", - "attachment.resources.fennec.gzipped": "true", + "attachment.resources.fennec.keep_old_files": "true", "attachment.resources.fingerprinting.fonts.randomize": "true", } ) @@ -47,9 +48,9 @@ def test_includeme_understand_authorized_resources_settings(self): def test_includeme_raises_error_for_malformed_resource_settings(self): with pytest.raises(ConfigurationError) as excinfo: - self.includeme(settings={"attachment.resources.fen.nec.fonts.gzipped": "true"}) + self.includeme(settings={"attachment.resources.fen.nec.fonts.keep_old_files": "true"}) assert str(excinfo.value) == ( - "Configuration rule malformed: `attachment.resources.fen.nec.fonts.gzipped`" + "Configuration rule malformed: `attachment.resources.fen.nec.fonts.keep_old_files`" ) def test_includeme_raises_error_if_wrong_resource_settings_is_defined(self): diff --git a/tests/test_utils.py b/tests/test_utils.py index 494335e..1375162 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,10 +1,3 @@ -import cgi -import unittest -from io import BytesIO - -from kinto_attachment.utils import save_file - - class _Registry(object): settings = {"attachment.folder": ""} attachment_resources = {} @@ -31,27 +24,3 @@ class _Request(object): def route_path(self, *args, **kw): return "fullpath" - - -class TestUtils(unittest.TestCase): - def test_save_file_gzip(self): - my_font = cgi.FieldStorage() - my_font.filename = "font.ttf" - my_font.file = BytesIO(b"content") - my_font.type = "application/x-font" - - request = _Request() - request.registry.settings["attachment.gzipped"] = True - res = save_file(request, my_font) - self.assertTrue("original" in res) - - def test_save_file_not_gzip(self): - my_font = cgi.FieldStorage() - my_font.filename = "font.ttf" - my_font.file = BytesIO(b"content") - my_font.type = "application/x-font" - - request = _Request() - request.registry.settings["attachment.gzipped"] = False - res = save_file(request, my_font) - self.assertFalse("original" in res) diff --git a/tests/test_views_attachment.py b/tests/test_views_attachment.py index 09ba644..06d93d2 100644 --- a/tests/test_views_attachment.py +++ b/tests/test_views_attachment.py @@ -4,8 +4,8 @@ from unittest import mock from urllib.parse import urlparse -import requests from kinto.core.errors import ERRORS + from kinto_attachment.utils import sha256 from . import BaseWebTestLocal, BaseWebTestS3, get_user_headers @@ -358,38 +358,23 @@ def test_upload_replace_accepted_if_write_allowed(self): self.upload(status=200) -class ZippedAttachementViewTest(BaseWebTestLocal, unittest.TestCase): - config = "config/local_gzipped.ini" - - def test_file_get_zipped_if_configured(self): - r = self.upload() - self.assertEqual(r.json["mimetype"], "application/x-gzip") - self.assertEqual(r.json["filename"], "image.jpg.gz") - - class PerResourceConfigAttachementViewTest(BaseWebTestS3, unittest.TestCase): config = "config/s3_per_resource.ini" - def test_file_get_zipped_in_fennec_bucket(self): + def test_file_get_randomize_in_fennec_bucket(self): r = self.upload() - self.assertEqual(r.json["original"]["mimetype"], "image/jpeg") - self.assertEqual(r.json["mimetype"], "application/x-gzip") - self.assertEqual(r.json["filename"], "image.jpg.gz") - - relative_url = r.json["location"].replace(self.base_url, "") - resp = requests.get("http://localhost:6000/myfiles/{}".format(relative_url)) - self.assertEqual(resp.headers["Content-Type"], "application/x-gzip") - self.assertNotIn("Content-Encoding", resp.headers) + self.assertEqual(r.json["filename"], "image.jpg") + self.assertNotIn(r.json["filename"], r.json["location"]) - def test_file_do_not_get_zipped_in_fennec_experiments_collection(self): + def test_file_do_not_get_randomize_in_fennec_experiments_collection(self): self.create_collection("fennec", "experiments") record_uri = self.get_record_uri("fennec", "experiments", str(uuid.uuid4())) self.endpoint_uri = record_uri + "/attachment" r = self.upload() - self.assertNotIn("original", r.json["mimetype"]) - self.assertEqual(r.json["mimetype"], "image/jpeg") + self.assertEqual(r.json["filename"], "image.jpg") + self.assertIn(r.json["filename"], r.json["location"]) class OverridenMimetypesTest(BaseWebTestS3, unittest.TestCase):