Skip to content

Commit

Permalink
Increase memory of raw lambda
Browse files Browse the repository at this point in the history
  • Loading branch information
philerooski committed Sep 10, 2024
1 parent a68065b commit 58c6f2d
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 31 deletions.
61 changes: 31 additions & 30 deletions src/lambda_function/raw/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,35 +211,37 @@ def yield_compressed_data(object_stream: io.BytesIO, path: str, part_threshold=N
part_threshold = 8 * 1024 * 1024
with zipfile.ZipFile(object_stream, "r") as zip_stream:
with zip_stream.open(path, "r") as json_stream:
compressed_data = io.BytesIO()
# analogous to the part number of a multipart upload
chunk_number = 1
with gzip.GzipFile(
filename=os.path.basename(path),
fileobj=compressed_data,
compresslevel=6,
mode="wb",
) as gzip_file:
# We can expect at least 10x compression, so reading/writing the
# JSON in 10*part_threshold chunks ensures we do not flush the
# gzip buffer too often, which can slow the write process significantly.
compression_factor = 10
for chunk in iter(
lambda: json_stream.read(compression_factor * part_threshold), b""
):
gzip_file.write(chunk)
# .flush() ensures that .tell() gives us an accurate byte count,
gzip_file.flush()
if compressed_data.tell() >= part_threshold:
yield compressed_data_wrapper(
compressed_data=compressed_data, chunk_number=chunk_number
)
compressed_data.seek(0)
compressed_data.truncate(0)
chunk_number = chunk_number + 1
yield compressed_data_wrapper(
compressed_data=compressed_data, chunk_number=chunk_number
)
with io.BytesIO() as compressed_data:
# analogous to the part number of a multipart upload
chunk_number = 1
with gzip.GzipFile(
filename=os.path.basename(path),
fileobj=compressed_data,
compresslevel=6,
mode="wb",
) as gzip_file:
# We can expect at least 10x compression, so reading/writing the
# JSON in 10*part_threshold chunks ensures we do not flush the
# gzip buffer too often, which can slow the write process significantly.
compression_factor = 10
for chunk in iter(
lambda: json_stream.read(compression_factor * part_threshold),
b"",
):
gzip_file.write(chunk)
# .flush() ensures that .tell() gives us an accurate byte count,
gzip_file.flush()
if compressed_data.tell() >= part_threshold:
yield compressed_data_wrapper(
compressed_data=compressed_data,
chunk_number=chunk_number,
)
compressed_data.seek(0)
compressed_data.truncate(0)
chunk_number = chunk_number + 1
yield compressed_data_wrapper(
compressed_data=compressed_data, chunk_number=chunk_number
)


def compressed_data_wrapper(compressed_data: io.BytesIO, chunk_number: int):
Expand Down Expand Up @@ -334,4 +336,3 @@ def main(event: dict, s3_client: boto3.client, raw_bucket: str, raw_key_prefix:
logger.info(
f"Complete multipart upload response: {completed_upload_response}"
)
return completed_upload_response
2 changes: 1 addition & 1 deletion src/lambda_function/raw/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Resources:
Handler: app.lambda_handler
Runtime: !Sub "python${LambdaPythonVersion}"
Role: !Ref RoleArn
MemorySize: 1024
MemorySize: 1769
EphemeralStorage:
Size: 2048
Timeout: 900
Expand Down

0 comments on commit 58c6f2d

Please sign in to comment.