Skip to content

Commit

Permalink
Drop option that gzips files server side (#724)
Browse files Browse the repository at this point in the history
* Drop option that gzips files server side

* Fixing lint error

* Removed code that no longer applies with gzip removed

---------

Co-authored-by: Alex Cottner <[email protected]>
  • Loading branch information
leplatrem and alexcottner authored Oct 16, 2024
1 parent 18227d1 commit df13ff9
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 195 deletions.
60 changes: 0 additions & 60 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,29 +120,6 @@ Or a specific collection:
kinto.attachment.resources.blog.articles.keep_old_files = true
The ``gzipped`` option
----------------------

If you want uploaded files to get gzipped when stored (default: False):

.. code-block:: ini
kinto.attachment.gzipped = true
Or only for a particular bucket:

.. code-block:: ini
kinto.attachment.resources.blog.gzipped = true
Or a specific collection:

.. code-block:: ini
kinto.attachment.resources.blog.articles.gzipped = true
The ``randomize`` option
------------------------

Expand Down Expand Up @@ -280,39 +257,6 @@ with the following fields:
}
If the file is gzipped by the server, an ``original`` key is added in the ``attachment``
key, containing the file info **before** it's gzipped. The ``attachment`` keys are
in that case referring to the gzipped file:


.. code-block:: json
{
"data": {
"attachment": {
"filename": "IMG_20150219_174559.jpg.gz",
"hash": "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad",
"location": "http://cdn.service.org/files/ffa9c7b9-7561-406b-b7f9-e00ac94644ff.jpg.gz",
"mimetype": "application/x-gzip",
"size": 14818,
"original": {
"filename": "IMG_20150219_174559.jpg",
"hash": "hPME6i9avCf/LFaznYr+sHtwQEX7mXYHSu+vgtygpM8=",
"mimetype": "image/jpeg",
"size": 1481798
}
},
"id": "c2ce1975-0e52-4b2f-a5db-80166aeca688",
"last_modified": 1447834938251,
"theme": "orange",
"type": "wallpaper"
},
"permissions": {
"write": ["basicauth:6de355038fd943a2dc91405063b91018bb5dd97a08d1beb95713d23c2909748f"]
}
}
Usage
=====

Expand Down Expand Up @@ -418,10 +362,6 @@ bucket and collection::

$ python3 scripts/upload.py --server=$SERVER --bucket=$BUCKET --collection=$COLLECTION --auth "token:mysecret" README.rst pictures/*

If the ``--gzip`` option is passed, the files are gzipped before upload.
Since the ``attachment`` attribute contains metadata of the compressed file
the original file metadata are stored in a ``original`` attribute.

See ``python3 scripts/upload.py --help`` for more details about options.


Expand Down
21 changes: 3 additions & 18 deletions scripts/download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import gzip
import hashlib
import os

Expand All @@ -19,14 +18,8 @@ def download_files(client, records, folder, chunk_size=1024):
continue

attachment = record['attachment']
# Check if file was Gzipped during upload (see `upload.py`)
is_gzip = 'original' in attachment
if is_gzip:
filename = attachment['original']['filename']
remote_hash = attachment['original']['hash']
else:
filename = attachment['filename']
remote_hash = attachment['hash']
filename = attachment['filename']
remote_hash = attachment['hash']

destination = os.path.join(folder, filename)

Expand All @@ -45,15 +38,7 @@ def download_files(client, records, folder, chunk_size=1024):
for chunk in resp.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)

# Decompress the file if necessary.
if is_gzip:
with open(destination, 'wb') as f:
data = open(tmp_file, 'rb').read()
f.write(gzip.decompress(data))
else:
os.rename(tmp_file, destination)

os.rename(tmp_file, destination)
print('Downloaded "%s"' % filename)


Expand Down
7 changes: 1 addition & 6 deletions scripts/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,7 @@ def files_to_upload(records, files, force=False):
records_by_id.pop(record['id'], None)
local_hash = sha256(open(filepath, 'rb').read())

# If file was uploaded gzipped, compare with hash of
# uncompressed file.
remote_hash = record.get('original', {}).get('hash')
if not remote_hash:
remote_hash = record['attachment']['hash']

remote_hash = record['attachment']['hash']
# If hash has changed, upload !
if local_hash != remote_hash or force:
print("File '%s' has changed." % filename)
Expand Down
6 changes: 3 additions & 3 deletions src/kinto_attachment/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def includeme(config):
if setting_name.startswith("attachment.resources."):
# Resource specific config
parts = setting_name.replace("attachment.resources.", "").split(".")
# attachment.resources.{bid}.gzipped
# attachment.resources.{bid}.{cid}.gzipped
# attachment.resources.{bid}.randomize
# attachment.resources.{bid}.{cid}.randomize
if len(parts) == 3:
bucket_id, collection_id, name = parts
resource_id = "/buckets/{}/collections/{}".format(bucket_id, collection_id)
Expand All @@ -33,7 +33,7 @@ def includeme(config):
message = "Configuration rule malformed: `{}`".format(setting_name)
raise ConfigurationError(message)

if name in ("gzipped", "randomize", "keep_old_files"):
if name in ("randomize", "keep_old_files"):
config.registry.attachment_resources[resource_id][name] = asbool(setting_value)
else:
message = "`{}` is not a supported setting name. Read `{}`".format(
Expand Down
33 changes: 0 additions & 33 deletions src/kinto_attachment/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import cgi
import gzip
import hashlib
import json
import os
from io import BytesIO

from kinto.authorization import RouteFactory
from kinto.core import utils as core_utils
Expand Down Expand Up @@ -144,7 +142,6 @@ def delete_attachment(request, link_field=None, uri=None, keep_old_files=False):


def save_file(request, content, folder=None, keep_link=True, replace=False):
gzipped = setting_value(request, "gzipped", default=False)
randomize = setting_value(request, "randomize", default=True)

overriden_mimetypes = {**DEFAULT_MIMETYPES}
Expand All @@ -167,41 +164,13 @@ def save_file(request, content, folder=None, keep_link=True, replace=False):
_, extension = os.path.splitext(filename)
mimetype = overriden_mimetypes.get(extension, content.type)

original = None
save_options = {
"folder": folder,
"randomize": randomize,
"replace": replace,
"headers": {"Content-Type": mimetype},
}

if gzipped:
original = {
"filename": filename,
"hash": filehash,
"mimetype": mimetype,
"size": size,
}
mimetype = "application/x-gzip"
filename += ".gz"
content.filename = filename
save_options["extensions"] = ["gz"]
save_options["headers"]["Content-Type"] = mimetype

# in-memory gzipping
out = BytesIO()
with gzip.GzipFile(fileobj=out, mode="w") as f:
f.write(filecontent)

filecontent = out.getvalue()
out.seek(0)
content.file = out

# We give the hash and size of the gzip content in the attachment
# metadata.
filehash = sha256(filecontent)
size = len(filecontent)

try:
location = request.attachment.save(content, **save_options)
except FileNotAllowed:
Expand All @@ -217,8 +186,6 @@ def save_file(request, content, folder=None, keep_link=True, replace=False):
"mimetype": mimetype,
"size": size,
}
if original is not None:
attachment["original"] = original

if keep_link:
# Store link between record and attachment (for later deletion).
Expand Down
16 changes: 0 additions & 16 deletions tests/config/local_gzipped.ini

This file was deleted.

4 changes: 2 additions & 2 deletions tests/config/s3_per_resource.ini
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ kinto.attachment.aws.access_key = aws
kinto.attachment.aws.secret_key = aws
kinto.attachment.aws.bucket_name = myfiles

kinto.attachment.resources.fennec.gzipped = true
kinto.attachment.resources.fennec.experiments.gzipped = false
kinto.attachment.resources.fennec.randomize = true
kinto.attachment.resources.fennec.experiments.randomize = false
9 changes: 5 additions & 4 deletions tests/test_plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

import pytest
from kinto import main as kinto_main
from kinto_attachment import __version__, includeme
from pyramid import testing
from pyramid.exceptions import ConfigurationError
from pyramid_storage.gcloud import GoogleCloudStorage
from pyramid_storage.interfaces import IFileStorage
from pyramid_storage.s3 import S3FileStorage

from kinto_attachment import __version__, includeme

from . import BaseWebTestLocal


Expand Down Expand Up @@ -37,7 +38,7 @@ def test_includeme_understand_authorized_resources_settings(self):
config = self.includeme(
settings={
"attachment.base_path": "/tmp",
"attachment.resources.fennec.gzipped": "true",
"attachment.resources.fennec.keep_old_files": "true",
"attachment.resources.fingerprinting.fonts.randomize": "true",
}
)
Expand All @@ -47,9 +48,9 @@ def test_includeme_understand_authorized_resources_settings(self):

def test_includeme_raises_error_for_malformed_resource_settings(self):
with pytest.raises(ConfigurationError) as excinfo:
self.includeme(settings={"attachment.resources.fen.nec.fonts.gzipped": "true"})
self.includeme(settings={"attachment.resources.fen.nec.fonts.keep_old_files": "true"})
assert str(excinfo.value) == (
"Configuration rule malformed: `attachment.resources.fen.nec.fonts.gzipped`"
"Configuration rule malformed: `attachment.resources.fen.nec.fonts.keep_old_files`"
)

def test_includeme_raises_error_if_wrong_resource_settings_is_defined(self):
Expand Down
31 changes: 0 additions & 31 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
import cgi
import unittest
from io import BytesIO

from kinto_attachment.utils import save_file


class _Registry(object):
settings = {"attachment.folder": ""}
attachment_resources = {}
Expand All @@ -31,27 +24,3 @@ class _Request(object):

def route_path(self, *args, **kw):
return "fullpath"


class TestUtils(unittest.TestCase):
def test_save_file_gzip(self):
my_font = cgi.FieldStorage()
my_font.filename = "font.ttf"
my_font.file = BytesIO(b"content")
my_font.type = "application/x-font"

request = _Request()
request.registry.settings["attachment.gzipped"] = True
res = save_file(request, my_font)
self.assertTrue("original" in res)

def test_save_file_not_gzip(self):
my_font = cgi.FieldStorage()
my_font.filename = "font.ttf"
my_font.file = BytesIO(b"content")
my_font.type = "application/x-font"

request = _Request()
request.registry.settings["attachment.gzipped"] = False
res = save_file(request, my_font)
self.assertFalse("original" in res)
29 changes: 7 additions & 22 deletions tests/test_views_attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from unittest import mock
from urllib.parse import urlparse

import requests
from kinto.core.errors import ERRORS

from kinto_attachment.utils import sha256

from . import BaseWebTestLocal, BaseWebTestS3, get_user_headers
Expand Down Expand Up @@ -358,38 +358,23 @@ def test_upload_replace_accepted_if_write_allowed(self):
self.upload(status=200)


class ZippedAttachementViewTest(BaseWebTestLocal, unittest.TestCase):
config = "config/local_gzipped.ini"

def test_file_get_zipped_if_configured(self):
r = self.upload()
self.assertEqual(r.json["mimetype"], "application/x-gzip")
self.assertEqual(r.json["filename"], "image.jpg.gz")


class PerResourceConfigAttachementViewTest(BaseWebTestS3, unittest.TestCase):
config = "config/s3_per_resource.ini"

def test_file_get_zipped_in_fennec_bucket(self):
def test_file_get_randomize_in_fennec_bucket(self):
r = self.upload()

self.assertEqual(r.json["original"]["mimetype"], "image/jpeg")
self.assertEqual(r.json["mimetype"], "application/x-gzip")
self.assertEqual(r.json["filename"], "image.jpg.gz")

relative_url = r.json["location"].replace(self.base_url, "")
resp = requests.get("http://localhost:6000/myfiles/{}".format(relative_url))
self.assertEqual(resp.headers["Content-Type"], "application/x-gzip")
self.assertNotIn("Content-Encoding", resp.headers)
self.assertEqual(r.json["filename"], "image.jpg")
self.assertNotIn(r.json["filename"], r.json["location"])

def test_file_do_not_get_zipped_in_fennec_experiments_collection(self):
def test_file_do_not_get_randomize_in_fennec_experiments_collection(self):
self.create_collection("fennec", "experiments")
record_uri = self.get_record_uri("fennec", "experiments", str(uuid.uuid4()))
self.endpoint_uri = record_uri + "/attachment"
r = self.upload()

self.assertNotIn("original", r.json["mimetype"])
self.assertEqual(r.json["mimetype"], "image/jpeg")
self.assertEqual(r.json["filename"], "image.jpg")
self.assertIn(r.json["filename"], r.json["location"])


class OverridenMimetypesTest(BaseWebTestS3, unittest.TestCase):
Expand Down

0 comments on commit df13ff9

Please sign in to comment.