Skip to content

Commit

Permalink
Clean up NMR utils and add tests for Bruker reader
Browse files Browse the repository at this point in the history
  • Loading branch information
ml-evs committed Aug 24, 2023
1 parent dbf988f commit 8bc011e
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 11 deletions.
34 changes: 23 additions & 11 deletions pydatalab/pydatalab/apps/nmr/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import itertools
import os
import re
from pathlib import Path

import matplotlib.pyplot as plt
import nmrglue as ng
Expand All @@ -13,15 +14,27 @@
######################################################################################


def read_bruker_1d(data, process_number=1, verbose=True, sample_mass_mg=None):
def read_bruker_1d(
data: Path | pd.DataFrame,
process_number: int = 1,
verbose: bool = False,
sample_mass_mg: float | None = None,
) -> tuple[pd.DataFrame | None, dict, str | None, tuple[int, ...]]:
"""Read a 1D bruker nmr spectrum and return it as a df.
arguments:
Parameters:
data: The directory of the full bruker data file, or a pandas DataFrame which
will be returned without further processing.
process_number: The process number of the processed data you want to plot [default: 1].
verbose: Whether to print information such as the spectrum title to stdout.
sample_mass_mg: The (optional) sample mass. If provided, the resulting DataFrame will have a "intensity_per_scan_per_gram" column.
Returns:
df: A pandas DataFrame containing the spectrum data, or None if the reading failed.
a_dic: A dictionary containing the acquisition parameters.
topspin_title: The title of the spectrum, as stored in the topspin "title" file.
shape: The shape of the spectrum data array.
data: The directory of the full bruker data file. You may also supply a df as this argument. In this case, the df is returned as is.
process_number: The process number of the processed data you want to plot [default 1]
verbose: Whether to print information such as the spectrum title to stdout (default True)
sample_mass_mg: The (optional) sample mass. If provided, the resulting DataFrame will have a "intensity_per_scan_per_gram" column.
"""

# if df is provided, just return it as-is. This functionality is provided to make functions calling read_bruker_1d flexible by default.
Expand All @@ -32,12 +45,12 @@ def read_bruker_1d(data, process_number=1, verbose=True, sample_mass_mg=None):
print("data frame provided to read_bruker_1d(). Returning it as is.")
return data
else:
data_dir = data
data_dir = Path(data)

processed_data_dir = os.path.join(data_dir, "pdata", str(process_number))
processed_data_dir = data_dir / "pdata" / str(process_number)

a_dic, a_data = ng.fileio.bruker.read(data_dir) # aquisition_data
p_dic, p_data = ng.fileio.bruker.read_pdata(processed_data_dir) # processing data
a_dic, a_data = ng.fileio.bruker.read(str(data_dir)) # aquisition_data
p_dic, p_data = ng.fileio.bruker.read_pdata(str(processed_data_dir)) # processing data

try:
with open(os.path.join(processed_data_dir, "title"), "r") as f:
Expand All @@ -46,7 +59,6 @@ def read_bruker_1d(data, process_number=1, verbose=True, sample_mass_mg=None):
topspin_title = None

if len(p_data.shape) > 1:
print("data is more than one dimensional - read failed")
return None, a_dic, topspin_title, p_data.shape

nscans = a_dic["acqus"]["NS"]
Expand Down
54 changes: 54 additions & 0 deletions pydatalab/tests/apps/test_nmr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import zipfile
from pathlib import Path

import pytest

from pydatalab.apps.nmr.utils import read_bruker_1d


def _extract_example(filename, dir):
with zipfile.ZipFile(filename, "r") as zip_ref:
zip_ref.extractall(dir)
return Path(dir) / filename.stem


@pytest.fixture(scope="function")
def nmr_1d_solution_example(tmpdir):
zip_path = Path(__file__).parent.parent.parent / "example_data" / "NMR" / "1.zip"
return _extract_example(zip_path, tmpdir)


@pytest.fixture(scope="function")
def nmr_1d_solid_example(tmpdir):
zip_path = Path(__file__).parent.parent.parent / "example_data" / "NMR" / "71.zip"
return _extract_example(zip_path, tmpdir)


@pytest.fixture(scope="function")
def nmr_2d_matpass_example(tmpdir):
zip_path = Path(__file__).parent.parent.parent / "example_data" / "NMR" / "72.zip"
return _extract_example(zip_path, tmpdir)


def test_bruker_reader_solution(nmr_1d_solution_example):
df, a_dic, topspin_title, shape = read_bruker_1d(nmr_1d_solution_example)
assert df is not None
assert a_dic
assert topspin_title
assert shape == (4096,)


def test_bruker_reader_solid(nmr_1d_solid_example):
df, a_dic, topspin_title, shape = read_bruker_1d(nmr_1d_solid_example)
assert df is not None
assert a_dic
assert topspin_title
assert shape == (9984,)


def test_bruker_reader_2D(nmr_2d_matpass_example):
df, a_dic, topspin_title, shape = read_bruker_1d(nmr_2d_matpass_example)
assert df is None
assert a_dic
assert topspin_title
assert shape == (8, 4096)

0 comments on commit 8bc011e

Please sign in to comment.