Skip to content

Commit

Permalink
process non-compressed file
Browse files Browse the repository at this point in the history
  • Loading branch information
ybressler committed May 28, 2024
1 parent 29c4e83 commit 75612d8
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
1 change: 0 additions & 1 deletion scripts/upload_index_to_s3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ echo "<ul>" >> "${index_file}"
aws s3 ls "s3://${bucket_name}/" --recursive --human-readable --profile "${profile_name}" | sort -rk4 | \
awk '{sub(/^ +/, "", $0); print}' | \
while read -r line; do
file_date=$(echo "${line}" | awk '{print $5, $6}')
file_name=$(echo "${line}" | awk '{print $5, $6, $7}')
file_size=$(echo "${line}" | awk '{print $3, $4}')
echo "<li><a href=\"https://${bucket_name}.s3.amazonaws.com/${file_name}\">${file_name}</a> - ${file_size}</li>" >> "${index_file}"
Expand Down
37 changes: 37 additions & 0 deletions src/process_data/non_compressed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
import argparse

import polars as pl


def process_non_compressed_file(file_name: str):
"""Processes file"""

# Can't open in context, otherwise, will need to load all data in memory...
df = (
pl.scan_csv(
file_name,
separator=";",
has_header=False,
with_column_names=lambda cols: ["station_name", "measurement"],
)
.group_by("station_name")
.agg(
pl.min("measurement").alias("min_measurement"),
pl.mean("measurement").alias("mean_measurement"),
pl.max("measurement").alias("max_measurement"),
)
.sort("station_name")
.collect(streaming=True)
)

df.write_csv(file_name.replace(".txt", "") + " - results.csv", separator=",")
return df


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Analyze measurement file")
parser.add_argument("-f", "--file_name", dest="file_name", type=str, help="File name")
args = parser.parse_args()

df = do_thing(args.file_name)

0 comments on commit 75612d8

Please sign in to comment.