Skip to content

Commit

Permalink
Merge pull request #154 from brittonsmith/ahfnew
Browse files Browse the repository at this point in the history
Ahfnew
  • Loading branch information
brittonsmith authored Nov 17, 2022
2 parents 3c4a5d9 + d8f66e6 commit d2266f7
Show file tree
Hide file tree
Showing 9 changed files with 277 additions and 39 deletions.
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,13 @@ jobs:

- restore_cache:
name: "Restore test data cache."
key: test-data-abbey
key: test-data-abernathy

- download-test-data

- save_cache:
name: "Save test data cache."
key: test-data-abbey
key: test-data-abernathy
paths:
- ~/ytree_test

Expand Down
22 changes: 17 additions & 5 deletions doc/source/Loading.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@ use the freely available :ref:`sample-data`.
Amiga Halo Finder
-----------------

The `Amiga Halo Finder <http://popia.ft.uam.es/AHF/Download.html>`__ format
stores data in a series of files, with one each per snapshot. Parameters
are stored in ".parameters" and ".log" files, halo information in
".AHF_halos" files, and descendent/ancestor links are stored in ".AHF_mtree"
files. Make sure to keep all of these together. To load, provide the name
There are a couple data formats associated with the `Amiga Halo Finder
<http://popia.ft.uam.es/AHF/>`__. Both formats save a series of files
associated with each snapshot. Parameters are stored in ".parameters"
and ".log" files and halo properties in ".AHF_halos" files. In the
older format, descendent/ancestor links are stored in several
".AHF_mtree" files, one per snapshot. In the newer format, all halo
linking information is stored in a single file beginning with
"MergerTree\_" and ending with "-CRMratio2". Make sure to keep all
these files together in the same directory. To load, provide the name
of the first ".parameter" file.

.. code-block:: python
Expand All @@ -24,6 +28,14 @@ of the first ".parameter" file.
>>> a = ytree.load("ahf_halos/snap_N64L16_000.parameter",
... hubble_constant=0.7)
Alternatively, the "MergerTree\_" file can also be provided for the
newer format.

.. code-block:: python
>>> import ytree
>>> a = ytree.load("AHF_100_tiny/MergerTree_GIZMO-NewMDCLUSTER_0047.txt-CRMratio2")
.. note:: Four important notes about loading AHF data:

1. The dimensionless Hubble parameter is not provided in AHF
Expand Down
9 changes: 8 additions & 1 deletion tests/frontends/test_ahf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from ytree.frontends.ahf import \
AHFArbor
AHFArbor, \
AHFNewArbor
from ytree.utilities.testing import \
ArborTest, \
TempDirTest
Expand All @@ -9,3 +10,9 @@ class AHFArborTest(TempDirTest, ArborTest):
test_filename = "ahf_halos/snap_N64L16_000.parameter"
num_data_files = 136
tree_skip = 100

class AHFNewArborTest(TempDirTest, ArborTest):
arbor_type = AHFNewArbor
test_filename = "AHF_100_tiny/GIZMO-NewMDCLUSTER_0047.snap_128.parameter"
num_data_files = 5
tree_skip = 100
36 changes: 35 additions & 1 deletion ytree/data_structures/arbor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1144,6 +1144,14 @@ def _plant_trees(self):
if self.is_planted:
return

# This is a somewhat hacky way of catching halos with links
# spanning more than one data set. That said, dict access
# is much faster than I thought and perhaps this whole
# routine needs to be refactored.
if self._has_uids:
all_dict = {}
missed_connections = []

# this can be called once with the list, but fields are
# not guaranteed to be returned in order.
if self._has_uids:
Expand Down Expand Up @@ -1182,15 +1190,29 @@ def _plant_trees(self):
root = i == 0 or descid == -1
# The data says a descendent exists, but it's not there.
# This shouldn't happen, but it does sometimes.
# This can also happen when a descendent is more than
# one snapshot removed.
mcollect = False
if not root and descid not in lastids:
root = True
my_descid = descid
descid = data[desc_id_f][it] = -1
if self._has_uids:
mcollect = True
tree_node = TreeNode(my_uid, arbor=self, root=root)
tree_node._fi = it
tree_node.data_file = data_file
batch[it] = tree_node

if self._has_uids:
all_dict[my_uid] = tree_node

if root:
trees.append(tree_node)
if mcollect:
tree_node._desc_uid = my_descid
missed_connections.append(tree_node)
else:
trees.append(tree_node)
else:
ancs[descid].append(tree_node)
uid += 1
Expand Down Expand Up @@ -1218,6 +1240,18 @@ def _plant_trees(self):
pbar.update(i+1)
pbar.finish()

if self._has_uids:
for node in missed_connections:
my_desc_uid = node._desc_uid
my_desc = all_dict[my_desc_uid]
delattr(node, "_desc_uid")

node._descendent = my_desc
node.root = my_desc.root
if my_desc._ancestors is None:
my_desc._ancestors = []
my_desc._ancestors.append(node)

self._trees = np.array(trees)
self._size = self._trees.size

Expand Down
3 changes: 2 additions & 1 deletion ytree/frontends/ahf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@
#-----------------------------------------------------------------------------

from ytree.frontends.ahf.arbor import \
AHFArbor
AHFArbor, \
AHFNewArbor
146 changes: 129 additions & 17 deletions ytree/frontends/ahf/arbor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,36 @@
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------

from collections import defaultdict
import glob
import os
import re

from ytree.data_structures.arbor import \
CatalogArbor
from ytree.frontends.ahf.fields import \
AHFFieldInfo
AHFFieldInfo, \
AHFNewFieldInfo
from ytree.frontends.ahf.io import \
AHFDataFile
AHFDataFile, \
AHFNewDataFile
from ytree.frontends.ahf.misc import \
parse_AHF_file
from unyt.unit_registry import \
UnitRegistry
from ytree.utilities.io import \
f_text_block

class AHFArbor(CatalogArbor):
"""
Arbor for Amiga Halo Finder data.
"""

_suffix = ".parameter"
_data_suffix = ".AHF_halos"
_mtree_suffix = ".AHF_mtree"
_par_suffix = ".parameter"
_crm_prefix = "MergerTree_"
_crm_suffix = ".txt-CRMratio2"
_field_info_class = AHFFieldInfo
_data_file_class = AHFDataFile

Expand All @@ -46,17 +55,42 @@ def __init__(self, filename, log_filename=None,
self.omega_matter = omega_matter
self.omega_lambda = omega_lambda
self._box_size_user = box_size
self._file_pattern = re.compile(
rf"(^.+[^0-9a-zA-Z]+)(\d+).*{self._par_suffix}$")
super().__init__(filename)

def _is_crm_file(self, filename):
return os.path.basename(filename).startswith(self._crm_prefix) and \
filename.endswith(self._crm_suffix)

def _get_crm_filename(self, filename):
# Searching for <keyword>.something.<suffix>
res = re.search(rf"([^\.]+)\.[^\.]+{self._par_suffix}$", filename)
if not res:
return None

filekey = res.groups()[0]
ddir = os.path.dirname(filekey)
bname = os.path.basename(filekey)
return os.path.join(ddir, f"{self._crm_prefix}{bname}{self._crm_suffix}")

def _parse_parameter_file(self):
df = AHFDataFile(self.filename, self)

pars = {"simu.omega0": "omega_matter",
"simu.lambda0": "omega_lambda",
"simu.boxsize": "box_size"}
log_filename = self.log_filename \
if self.log_filename is not None else df.filekey + ".log"
if os.path.exists(log_filename):

if self.log_filename is None:
fns = glob.glob(df.filekey + "*.log")
if fns:
log_filename = fns[0]
else:
log_filename = None
else:
log_filename = self.log_filename

if log_filename is not None and os.path.exists(log_filename):
vals = parse_AHF_file(log_filename, pars, sep=":")
for attr in ["omega_matter",
"omega_lambda"]:
Expand All @@ -68,7 +102,7 @@ def _parse_parameter_file(self):
self.box_size = self.quan(self._box_size_user, "Mpc/h")

# fields from from the .AHF_halos files
f = open(f"{df.data_filekey}.AHF_halos")
f = open(f"{df.data_filekey}{self._data_suffix}")
line = f.readline()
f.close()

Expand All @@ -95,19 +129,17 @@ def _prefix(self):
# Match a patten of any characters, followed by some sort of
# separator (e.g., "." or "_"), then a number, and eventually
# the suffix.
reg = re.search(rf"(^.+[^0-9a-zA-Z]+)\d+.+{self._suffix}$", self.filename)
reg = self._file_pattern.search(self.filename)
self._fprefix = reg.groups()[0]
return self._fprefix

def _get_data_files(self):
"""
Get all *.parameter files and sort them in reverse order.
"""
my_files = glob.glob(f"{self._prefix}*{self._suffix}")
my_files = glob.glob(f"{self._prefix}*{self._par_suffix}")
# sort by catalog number
my_files.sort(
key=lambda x:
self._get_file_index(x))
my_files.sort(key=self._get_file_index)
self.data_files = \
[self._data_file_class(f, self) for f in my_files]

Expand All @@ -120,19 +152,99 @@ def _get_data_files(self):
self.data_files.reverse()

def _get_file_index(self, f):
reg = re.search(rf"{self._prefix}(\d+){self._suffix}$", f)
reg = self._file_pattern.search(f)
if not reg:
raise RuntimeError(
f"Could not locate index within file: {f}.")
return int(reg.groups()[0])
return int(reg.groups()[1])

@classmethod
def _is_valid(self, *args, **kwargs):
"""
File must end in .AHF_halos and have an associated
.parameter file.
File mush end in .parameter.
"""
fn = args[0]
if not fn.endswith(self._suffix):
if not fn.endswith(self._par_suffix):
return False

mtree_fn = self._get_crm_filename(self, fn)
if mtree_fn is not None and os.path.exists(mtree_fn):
return False

return True

class AHFNewArbor(AHFArbor):
"""
Arbor for a newer version of Amiga Halo Finder data.
"""

_has_uids = True
_field_info_class = AHFNewFieldInfo
_data_file_class = AHFNewDataFile

def _set_paths(self, filename):
super()._set_paths(filename)
if self._is_crm_file(filename):
basename = os.path.basename(filename)
filekey = basename[len(self._crm_prefix):-len(self._crm_suffix)]
pfns = glob.glob(os.path.join(self.directory, filekey) +
f"*{self._par_suffix}")
pfns.sort(key=self._get_file_index)
self.filename = pfns[-1]
self._crm_filename = self._get_crm_filename(self.filename)

def _plant_trees(self):
if self.is_planted:
return

self._compute_links()
super()._plant_trees()

def _compute_links(self):
"""
Read the CRMratio2 file and hand out a dictionary of
uid: desc_uid for each data file.
"""

links = defaultdict(dict)

f = open(self._crm_filename, mode="r")
for i in range(3):
f.readline()

for line, loc in f_text_block(f, pbar_string="Computing links"):
if line.startswith("END"):
break

online = line.split()
thing = online[0]
if len(online) == 2:
my_descid = int(thing)
continue

my_id = int(thing)
cid = int(thing[:-12])
links[cid][my_id] = my_descid
f.close()

for df in self.data_files:
df._links = links[df._catalog_index]

@classmethod
def _is_valid(self, *args, **kwargs):
"""
Filename must end in .parameter or match the CRM naming
convention.
"""
fn = args[0]
if self._is_crm_file(self, fn):
return True

if not fn.endswith(self._par_suffix):
return False

mtree_fn = self._get_crm_filename(self, fn)
if mtree_fn is None or not os.path.exists(mtree_fn):
return False

return True
Loading

0 comments on commit d2266f7

Please sign in to comment.