Skip to content

Commit

Permalink
Small optimisations for referenceFS (#1393)
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant authored Oct 19, 2023
1 parent c20c31a commit 6aa8a9a
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions fsspec/implementations/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,9 @@ def __init__(
self._items = {}
self.dirs = None
self.fs = fsspec.filesystem("file") if fs is None else fs
with self.fs.open("/".join([self.root, ".zmetadata"]), "rb") as f:
self._items[".zmetadata"] = f.read()
self._items[".zmetadata"] = self.fs.cat_file(
"/".join([self.root, ".zmetadata"])
)
met = json.loads(self._items[".zmetadata"])
self.record_size = met["record_size"]
self.zmetadata = met["metadata"]
Expand All @@ -131,10 +132,8 @@ def __init__(
def open_refs(field, record):
"""cached parquet file loader"""
path = self.url.format(field=field, record=record)
with self.fs.open(path) as f:
# TODO: since all we do is iterate, is arrow without pandas
# better here?
df = self.pd.read_parquet(f, engine="fastparquet")
data = io.BytesIO(self.fs.cat_file(path))
df = self.pd.read_parquet(data, engine="fastparquet")
refs = {c: df[c].values for c in df.columns}
return refs

Expand Down

0 comments on commit 6aa8a9a

Please sign in to comment.