Skip to content

Commit

Permalink
Push changes
Browse files Browse the repository at this point in the history
  • Loading branch information
ybressler committed May 28, 2024
1 parent b07dd1d commit fd5e22e
Showing 1 changed file with 6 additions and 12 deletions.
18 changes: 6 additions & 12 deletions src/process_data/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
Process a measurements file:
1. If compressed, decompress
- check first if already decompressed (no need to reprocess)
2. Calculate result
3. Store result in new file
2. Parse date from file name
3. Calculate result
4. Store result in new file
Code credits: https://github.com/ifnesi/1brc#submitting
"""
Expand All @@ -22,6 +23,7 @@ def do_thing(file_name: str):
with pgzip.open(file_name, 'rb') as f_in, open(file_name.replace(".gz", ""), 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)

# Can't open in context, otherwise, will need to load all data in memory...
df = (
pl.scan_csv(new_file_name, separator=";", has_header=False, with_column_names=lambda cols: ["station_name", "measurement"])
.group_by("station_name")
Expand All @@ -34,7 +36,7 @@ def do_thing(file_name: str):
.collect(streaming=True)
)

df.write_csv(f'new_file_name'.replace("txt","") + " - results.csv", separator=",")
df.write_csv(new_file_name.replace(".txt","") + " - results.csv", separator=",")
return df


Expand All @@ -50,12 +52,4 @@ def do_thing(file_name: str):
)
args = parser.parse_args()

df = do_thing(args.file_name)

print("{", end="")
for row in df.iter_rows():
print(
f"{row[0]}={row[1]:.1f}/{row[2]:.1f}/{row[3]:.1f}",
end=", "
)
print("\b\b} ")
df = do_thing(args.file_name)

0 comments on commit fd5e22e

Please sign in to comment.