Skip to content

Commit

Permalink
Refactor NMR block into its own app module and restructure blocks int…
Browse files Browse the repository at this point in the history
…o common and base
  • Loading branch information
ml-evs committed Aug 24, 2023
1 parent c47a141 commit f41992f
Show file tree
Hide file tree
Showing 13 changed files with 172 additions and 155 deletions.
2 changes: 1 addition & 1 deletion pydatalab/pydatalab/apps/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# This import is required to prevent circular imports for application-specific blocks
from pydatalab.blocks.blocks import DataBlock # noqa
from pydatalab.blocks.base import DataBlock # noqa
2 changes: 1 addition & 1 deletion pydatalab/pydatalab/apps/chat/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import openai
import tiktoken

from pydatalab.blocks.blocks import DataBlock
from pydatalab.blocks.base import DataBlock
from pydatalab.logger import LOGGER
from pydatalab.models import ITEM_MODELS
from pydatalab.utils import CustomJSONEncoder
Expand Down
2 changes: 1 addition & 1 deletion pydatalab/pydatalab/apps/echem/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from navani import echem as ec

from pydatalab import bokeh_plots
from pydatalab.blocks.blocks import DataBlock
from pydatalab.blocks.base import DataBlock
from pydatalab.file_utils import get_file_info_by_id
from pydatalab.logger import LOGGER
from pydatalab.mongo import flask_mongo
Expand Down
2 changes: 1 addition & 1 deletion pydatalab/pydatalab/apps/eis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
from bokeh.models import HoverTool, LogColorMapper

from pydatalab.blocks.blocks import DataBlock
from pydatalab.blocks.base import DataBlock
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
from pydatalab.file_utils import get_file_info_by_id
from pydatalab.logger import LOGGER
Expand Down
3 changes: 3 additions & 0 deletions pydatalab/pydatalab/apps/nmr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .blocks import NMRBlock

__all__ = ("NMRBlock",)
107 changes: 107 additions & 0 deletions pydatalab/pydatalab/apps/nmr/blocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import os
import zipfile

import bokeh.embed
import pandas as pd

from pydatalab.blocks.base import DataBlock
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
from pydatalab.file_utils import get_file_info_by_id
from pydatalab.logger import LOGGER

from .utils import read_bruker_1d


class NMRBlock(DataBlock):
blocktype = "nmr"
description = "Simple NMR Block"
accepted_file_extensions = ".zip"
defaults = {"process number": 1}
_supports_collections = False

@property
def plot_functions(self):
return (self.generate_nmr_plot,)

def read_bruker_nmr_data(self):
if "file_id" not in self.data:
LOGGER.warning("NMRPlot.read_bruker_nmr_data(): No file set in the DataBlock")
return

zip_file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
filename = zip_file_info["name"]

name, ext = os.path.splitext(filename)
if ext.lower() not in self.accepted_file_extensions:
LOGGER.warning(
"NMRBlock.read_bruker_nmr_data(): Unsupported file extension (must be .zip)"
)
return

# unzip:
directory_location = zip_file_info["location"] + ".extracted"
LOGGER.debug(f"Directory location is: {directory_location}")
with zipfile.ZipFile(zip_file_info["location"], "r") as zip_ref:
zip_ref.extractall(directory_location)

extracted_directory_name = os.path.join(directory_location, name)
available_processes = os.listdir(os.path.join(extracted_directory_name, "pdata"))

if self.data.get("selected_process") not in available_processes:
self.data["selected_process"] = available_processes[0]

try:
df, a_dic, topspin_title, processed_data_shape = read_bruker_1d(
os.path.join(directory_location, name),
process_number=self.data["selected_process"],
verbose=False,
)
except Exception as error:
LOGGER.critical(f"Unable to parse {name} as Bruker project. {error}")
return

serialized_df = df.to_dict() if (df is not None) else None

# all data sorted in a fairly raw way
self.data["processed_data"] = serialized_df
self.data["acquisition_parameters"] = a_dic["acqus"]
self.data["processing_parameters"] = a_dic["procs"]
self.data["pulse_program"] = a_dic["pprog"]

# specific things that we might want to pull out for the UI:
self.data["available_processes"] = available_processes
self.data["nucleus"] = a_dic["acqus"]["NUC1"]
self.data["carrier_frequency_MHz"] = a_dic["acqus"]["SFO1"]
self.data["carrier_offset_Hz"] = a_dic["acqus"]["O1"]
self.data["recycle_delay"] = a_dic["acqus"]["D"][1]
self.data["nscans"] = a_dic["acqus"]["NS"]
self.data["CNST31"] = a_dic["acqus"]["CNST"][31]
self.data["processed_data_shape"] = processed_data_shape

self.data["probe_name"] = a_dic["acqus"]["PROBHD"]
self.data["pulse_program_name"] = a_dic["acqus"]["PULPROG"]
self.data["topspin_title"] = topspin_title

def generate_nmr_plot(self):
self.read_bruker_nmr_data() # currently calls every time plotting happens, but it should only happen if the file was updated
if "processed_data" not in self.data or not self.data["processed_data"]:
self.data["bokeh_plot_data"] = None
return

df = pd.DataFrame(self.data["processed_data"])
df["normalized intensity"] = df.intensity / df.intensity.max()

bokeh_layout = selectable_axes_plot(
df,
x_options=["ppm", "hz"],
y_options=[
"intensity",
"intensity_per_scan",
"normalized intensity",
],
plot_line=True,
point_size=3,
)
bokeh_layout.children[0].x_range.flipped = True # flip x axis, per NMR convention

self.data["bokeh_plot_data"] = bokeh.embed.json_item(bokeh_layout, theme=mytheme)
File renamed without changes.
2 changes: 1 addition & 1 deletion pydatalab/pydatalab/apps/raman/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pybaselines import Baseline
from scipy.signal import medfilt

from pydatalab.blocks.blocks import DataBlock
from pydatalab.blocks.base import DataBlock
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
from pydatalab.file_utils import get_file_info_by_id

Expand Down
2 changes: 1 addition & 1 deletion pydatalab/pydatalab/apps/tga/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from scipy.signal import savgol_filter

from pydatalab.apps.tga.parsers import parse_mt_mass_spec_ascii
from pydatalab.blocks.blocks import DataBlock
from pydatalab.blocks.base import DataBlock
from pydatalab.bokeh_plots import grid_theme, selectable_axes_plot
from pydatalab.file_utils import get_file_info_by_id
from pydatalab.logger import LOGGER
Expand Down
2 changes: 1 addition & 1 deletion pydatalab/pydatalab/apps/xrd/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd
from scipy.signal import medfilt

from pydatalab.blocks.blocks import DataBlock
from pydatalab.blocks.base import DataBlock
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
from pydatalab.file_utils import get_file_info_by_id
from pydatalab.logger import LOGGER
Expand Down
9 changes: 6 additions & 3 deletions pydatalab/pydatalab/blocks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from typing import Dict, Sequence, Type

# These app imports will be replaced by dynamic plugins in a future version
from pydatalab.apps.chat.blocks import ChatBlock
from pydatalab.apps.echem import CycleBlock
from pydatalab.apps.eis import EISBlock
from pydatalab.apps.nmr import NMRBlock
from pydatalab.apps.raman import RamanBlock
from pydatalab.apps.tga import MassSpecBlock
from pydatalab.apps.xrd import XRDBlock
from pydatalab.blocks.blocks import (
CommentBlock,
from pydatalab.blocks.base import (
DataBlock,
)
from pydatalab.blocks.common import (
CommentBlock,
MediaBlock,
NMRBlock,
NotSupportedBlock,
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,8 @@
import base64
import io
import os
import random
import zipfile
from typing import Any, Callable, Dict, Optional, Sequence

import bokeh.embed
import pandas as pd
from bson import ObjectId
from PIL import Image

from pydatalab import nmr_utils
from pydatalab.bokeh_plots import mytheme, selectable_axes_plot
from pydatalab.file_utils import get_file_info_by_id
from pydatalab.logger import LOGGER

__all__ = ("generate_random_id", "DataBlock")
Expand Down Expand Up @@ -170,138 +160,3 @@ def update_from_web(self, data):
self.data.update(data)

return self


class NotSupportedBlock(DataBlock):
blocktype = "notsupported"
description = "Block not supported"
_supports_collections = True


class CommentBlock(DataBlock):
blocktype = "comment"
description = "Comment"
_supports_collections = True


class MediaBlock(DataBlock):
blocktype = "media"
description = "Media"
accepted_file_extensions = (".png", ".jpeg", ".jpg", ".tif", ".tiff", ".mp4", ".mov", ".webm")
_supports_collections = False

@property
def plot_functions(self):
return (self.encode_tiff,)

def encode_tiff(self):
if "file_id" not in self.data:
LOGGER.warning("ImageBlock.encode_tiff(): No file set in the DataBlock")
return
if "b64_encoded_image" not in self.data:
self.data["b64_encoded_image"] = {}
file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
if file_info["name"].endswith(".tif") or file_info["name"].endswith(".tiff"):
im = Image.open(file_info["location"])
LOGGER.warning("Making base64 encoding of tif")
with io.BytesIO() as f:
im.save(f, format="PNG")
f.seek(0)
self.data["b64_encoded_image"][self.data["file_id"]] = base64.b64encode(
f.getvalue()
).decode()


class NMRBlock(DataBlock):
blocktype = "nmr"
description = "Simple NMR Block"
accepted_file_extensions = ".zip"
defaults = {"process number": 1}
_supports_collections = False

@property
def plot_functions(self):
return (self.generate_nmr_plot,)

def read_bruker_nmr_data(self):
if "file_id" not in self.data:
LOGGER.warning("NMRPlot.read_bruker_nmr_data(): No file set in the DataBlock")
return

zip_file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
filename = zip_file_info["name"]

name, ext = os.path.splitext(filename)
if ext.lower() not in self.accepted_file_extensions:
LOGGER.warning(
"NMRBlock.read_bruker_nmr_data(): Unsupported file extension (must be .zip)"
)
return

# unzip:
directory_location = zip_file_info["location"] + ".extracted"
LOGGER.debug(f"Directory location is: {directory_location}")
with zipfile.ZipFile(zip_file_info["location"], "r") as zip_ref:
zip_ref.extractall(directory_location)

extracted_directory_name = os.path.join(directory_location, name)
available_processes = os.listdir(os.path.join(extracted_directory_name, "pdata"))

if self.data.get("selected_process") not in available_processes:
self.data["selected_process"] = available_processes[0]

try:
df, a_dic, topspin_title, processed_data_shape = nmr_utils.read_bruker_1d(
os.path.join(directory_location, name),
process_number=self.data["selected_process"],
verbose=False,
)
except Exception as error:
LOGGER.critical(f"Unable to parse {name} as Bruker project. {error}")
return

serialized_df = df.to_dict() if (df is not None) else None

# all data sorted in a fairly raw way
self.data["processed_data"] = serialized_df
self.data["acquisition_parameters"] = a_dic["acqus"]
self.data["processing_parameters"] = a_dic["procs"]
self.data["pulse_program"] = a_dic["pprog"]

# specific things that we might want to pull out for the UI:
self.data["available_processes"] = available_processes
self.data["nucleus"] = a_dic["acqus"]["NUC1"]
self.data["carrier_frequency_MHz"] = a_dic["acqus"]["SFO1"]
self.data["carrier_offset_Hz"] = a_dic["acqus"]["O1"]
self.data["recycle_delay"] = a_dic["acqus"]["D"][1]
self.data["nscans"] = a_dic["acqus"]["NS"]
self.data["CNST31"] = a_dic["acqus"]["CNST"][31]
self.data["processed_data_shape"] = processed_data_shape

self.data["probe_name"] = a_dic["acqus"]["PROBHD"]
self.data["pulse_program_name"] = a_dic["acqus"]["PULPROG"]
self.data["topspin_title"] = topspin_title

def generate_nmr_plot(self):
self.read_bruker_nmr_data() # currently calls every time plotting happens, but it should only happen if the file was updated
if "processed_data" not in self.data or not self.data["processed_data"]:
self.data["bokeh_plot_data"] = None
return

df = pd.DataFrame(self.data["processed_data"])
df["normalized intensity"] = df.intensity / df.intensity.max()

bokeh_layout = selectable_axes_plot(
df,
x_options=["ppm", "hz"],
y_options=[
"intensity",
"intensity_per_scan",
"normalized intensity",
],
plot_line=True,
point_size=3,
)
bokeh_layout.children[0].x_range.flipped = True # flip x axis, per NMR convention

self.data["bokeh_plot_data"] = bokeh.embed.json_item(bokeh_layout, theme=mytheme)
Loading

0 comments on commit f41992f

Please sign in to comment.