From f41992f10b5e0ad4d13443a610fbc3268642aeed Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Thu, 24 Aug 2023 14:59:43 +0100 Subject: [PATCH] Refactor NMR block into its own app module and restructure blocks into common and base --- pydatalab/pydatalab/apps/__init__.py | 2 +- pydatalab/pydatalab/apps/chat/blocks.py | 2 +- pydatalab/pydatalab/apps/echem/blocks.py | 2 +- pydatalab/pydatalab/apps/eis/__init__.py | 2 +- pydatalab/pydatalab/apps/nmr/__init__.py | 3 + pydatalab/pydatalab/apps/nmr/blocks.py | 107 +++++++++++++ .../{nmr_utils.py => apps/nmr/utils.py} | 0 pydatalab/pydatalab/apps/raman/blocks.py | 2 +- pydatalab/pydatalab/apps/tga/blocks.py | 2 +- pydatalab/pydatalab/apps/xrd/blocks.py | 2 +- pydatalab/pydatalab/blocks/__init__.py | 9 +- .../pydatalab/blocks/{blocks.py => base.py} | 145 ------------------ pydatalab/pydatalab/blocks/common.py | 49 ++++++ 13 files changed, 172 insertions(+), 155 deletions(-) create mode 100644 pydatalab/pydatalab/apps/nmr/__init__.py create mode 100644 pydatalab/pydatalab/apps/nmr/blocks.py rename pydatalab/pydatalab/{nmr_utils.py => apps/nmr/utils.py} (100%) rename pydatalab/pydatalab/blocks/{blocks.py => base.py} (52%) create mode 100644 pydatalab/pydatalab/blocks/common.py diff --git a/pydatalab/pydatalab/apps/__init__.py b/pydatalab/pydatalab/apps/__init__.py index 9d2070151..456cdcedd 100644 --- a/pydatalab/pydatalab/apps/__init__.py +++ b/pydatalab/pydatalab/apps/__init__.py @@ -1,2 +1,2 @@ # This import is required to prevent circular imports for application-specific blocks -from pydatalab.blocks.blocks import DataBlock # noqa +from pydatalab.blocks.base import DataBlock # noqa diff --git a/pydatalab/pydatalab/apps/chat/blocks.py b/pydatalab/pydatalab/apps/chat/blocks.py index af4b89bc6..ce7c9bdc5 100644 --- a/pydatalab/pydatalab/apps/chat/blocks.py +++ b/pydatalab/pydatalab/apps/chat/blocks.py @@ -5,7 +5,7 @@ import openai import tiktoken -from pydatalab.blocks.blocks import DataBlock +from pydatalab.blocks.base import DataBlock from pydatalab.logger import LOGGER from pydatalab.models import ITEM_MODELS from pydatalab.utils import CustomJSONEncoder diff --git a/pydatalab/pydatalab/apps/echem/blocks.py b/pydatalab/pydatalab/apps/echem/blocks.py index 76ce47b9b..f1ffe1969 100644 --- a/pydatalab/pydatalab/apps/echem/blocks.py +++ b/pydatalab/pydatalab/apps/echem/blocks.py @@ -9,7 +9,7 @@ from navani import echem as ec from pydatalab import bokeh_plots -from pydatalab.blocks.blocks import DataBlock +from pydatalab.blocks.base import DataBlock from pydatalab.file_utils import get_file_info_by_id from pydatalab.logger import LOGGER from pydatalab.mongo import flask_mongo diff --git a/pydatalab/pydatalab/apps/eis/__init__.py b/pydatalab/pydatalab/apps/eis/__init__.py index 5679e0c52..a9b77d652 100644 --- a/pydatalab/pydatalab/apps/eis/__init__.py +++ b/pydatalab/pydatalab/apps/eis/__init__.py @@ -5,7 +5,7 @@ import pandas as pd from bokeh.models import HoverTool, LogColorMapper -from pydatalab.blocks.blocks import DataBlock +from pydatalab.blocks.base import DataBlock from pydatalab.bokeh_plots import mytheme, selectable_axes_plot from pydatalab.file_utils import get_file_info_by_id from pydatalab.logger import LOGGER diff --git a/pydatalab/pydatalab/apps/nmr/__init__.py b/pydatalab/pydatalab/apps/nmr/__init__.py new file mode 100644 index 000000000..7c4581bae --- /dev/null +++ b/pydatalab/pydatalab/apps/nmr/__init__.py @@ -0,0 +1,3 @@ +from .blocks import NMRBlock + +__all__ = ("NMRBlock",) diff --git a/pydatalab/pydatalab/apps/nmr/blocks.py b/pydatalab/pydatalab/apps/nmr/blocks.py new file mode 100644 index 000000000..022131b4d --- /dev/null +++ b/pydatalab/pydatalab/apps/nmr/blocks.py @@ -0,0 +1,107 @@ +import os +import zipfile + +import bokeh.embed +import pandas as pd + +from pydatalab.blocks.base import DataBlock +from pydatalab.bokeh_plots import mytheme, selectable_axes_plot +from pydatalab.file_utils import get_file_info_by_id +from pydatalab.logger import LOGGER + +from .utils import read_bruker_1d + + +class NMRBlock(DataBlock): + blocktype = "nmr" + description = "Simple NMR Block" + accepted_file_extensions = ".zip" + defaults = {"process number": 1} + _supports_collections = False + + @property + def plot_functions(self): + return (self.generate_nmr_plot,) + + def read_bruker_nmr_data(self): + if "file_id" not in self.data: + LOGGER.warning("NMRPlot.read_bruker_nmr_data(): No file set in the DataBlock") + return + + zip_file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True) + filename = zip_file_info["name"] + + name, ext = os.path.splitext(filename) + if ext.lower() not in self.accepted_file_extensions: + LOGGER.warning( + "NMRBlock.read_bruker_nmr_data(): Unsupported file extension (must be .zip)" + ) + return + + # unzip: + directory_location = zip_file_info["location"] + ".extracted" + LOGGER.debug(f"Directory location is: {directory_location}") + with zipfile.ZipFile(zip_file_info["location"], "r") as zip_ref: + zip_ref.extractall(directory_location) + + extracted_directory_name = os.path.join(directory_location, name) + available_processes = os.listdir(os.path.join(extracted_directory_name, "pdata")) + + if self.data.get("selected_process") not in available_processes: + self.data["selected_process"] = available_processes[0] + + try: + df, a_dic, topspin_title, processed_data_shape = read_bruker_1d( + os.path.join(directory_location, name), + process_number=self.data["selected_process"], + verbose=False, + ) + except Exception as error: + LOGGER.critical(f"Unable to parse {name} as Bruker project. {error}") + return + + serialized_df = df.to_dict() if (df is not None) else None + + # all data sorted in a fairly raw way + self.data["processed_data"] = serialized_df + self.data["acquisition_parameters"] = a_dic["acqus"] + self.data["processing_parameters"] = a_dic["procs"] + self.data["pulse_program"] = a_dic["pprog"] + + # specific things that we might want to pull out for the UI: + self.data["available_processes"] = available_processes + self.data["nucleus"] = a_dic["acqus"]["NUC1"] + self.data["carrier_frequency_MHz"] = a_dic["acqus"]["SFO1"] + self.data["carrier_offset_Hz"] = a_dic["acqus"]["O1"] + self.data["recycle_delay"] = a_dic["acqus"]["D"][1] + self.data["nscans"] = a_dic["acqus"]["NS"] + self.data["CNST31"] = a_dic["acqus"]["CNST"][31] + self.data["processed_data_shape"] = processed_data_shape + + self.data["probe_name"] = a_dic["acqus"]["PROBHD"] + self.data["pulse_program_name"] = a_dic["acqus"]["PULPROG"] + self.data["topspin_title"] = topspin_title + + def generate_nmr_plot(self): + self.read_bruker_nmr_data() # currently calls every time plotting happens, but it should only happen if the file was updated + if "processed_data" not in self.data or not self.data["processed_data"]: + self.data["bokeh_plot_data"] = None + return + + df = pd.DataFrame(self.data["processed_data"]) + df["normalized intensity"] = df.intensity / df.intensity.max() + + bokeh_layout = selectable_axes_plot( + df, + x_options=["ppm", "hz"], + y_options=[ + "intensity", + "intensity_per_scan", + "normalized intensity", + ], + plot_line=True, + point_size=3, + ) + bokeh_layout.children[0].x_range.flipped = True # flip x axis, per NMR convention + + self.data["bokeh_plot_data"] = bokeh.embed.json_item(bokeh_layout, theme=mytheme) diff --git a/pydatalab/pydatalab/nmr_utils.py b/pydatalab/pydatalab/apps/nmr/utils.py similarity index 100% rename from pydatalab/pydatalab/nmr_utils.py rename to pydatalab/pydatalab/apps/nmr/utils.py diff --git a/pydatalab/pydatalab/apps/raman/blocks.py b/pydatalab/pydatalab/apps/raman/blocks.py index 91a6526bb..5aa28ace6 100644 --- a/pydatalab/pydatalab/apps/raman/blocks.py +++ b/pydatalab/pydatalab/apps/raman/blocks.py @@ -7,7 +7,7 @@ from pybaselines import Baseline from scipy.signal import medfilt -from pydatalab.blocks.blocks import DataBlock +from pydatalab.blocks.base import DataBlock from pydatalab.bokeh_plots import mytheme, selectable_axes_plot from pydatalab.file_utils import get_file_info_by_id diff --git a/pydatalab/pydatalab/apps/tga/blocks.py b/pydatalab/pydatalab/apps/tga/blocks.py index 74ab05036..c4fe1dbb9 100644 --- a/pydatalab/pydatalab/apps/tga/blocks.py +++ b/pydatalab/pydatalab/apps/tga/blocks.py @@ -7,7 +7,7 @@ from scipy.signal import savgol_filter from pydatalab.apps.tga.parsers import parse_mt_mass_spec_ascii -from pydatalab.blocks.blocks import DataBlock +from pydatalab.blocks.base import DataBlock from pydatalab.bokeh_plots import grid_theme, selectable_axes_plot from pydatalab.file_utils import get_file_info_by_id from pydatalab.logger import LOGGER diff --git a/pydatalab/pydatalab/apps/xrd/blocks.py b/pydatalab/pydatalab/apps/xrd/blocks.py index 26d188387..210fa2d72 100644 --- a/pydatalab/pydatalab/apps/xrd/blocks.py +++ b/pydatalab/pydatalab/apps/xrd/blocks.py @@ -6,7 +6,7 @@ import pandas as pd from scipy.signal import medfilt -from pydatalab.blocks.blocks import DataBlock +from pydatalab.blocks.base import DataBlock from pydatalab.bokeh_plots import mytheme, selectable_axes_plot from pydatalab.file_utils import get_file_info_by_id from pydatalab.logger import LOGGER diff --git a/pydatalab/pydatalab/blocks/__init__.py b/pydatalab/pydatalab/blocks/__init__.py index 15e2c5f16..a35bea310 100644 --- a/pydatalab/pydatalab/blocks/__init__.py +++ b/pydatalab/pydatalab/blocks/__init__.py @@ -1,16 +1,19 @@ from typing import Dict, Sequence, Type +# These app imports will be replaced by dynamic plugins in a future version from pydatalab.apps.chat.blocks import ChatBlock from pydatalab.apps.echem import CycleBlock from pydatalab.apps.eis import EISBlock +from pydatalab.apps.nmr import NMRBlock from pydatalab.apps.raman import RamanBlock from pydatalab.apps.tga import MassSpecBlock from pydatalab.apps.xrd import XRDBlock -from pydatalab.blocks.blocks import ( - CommentBlock, +from pydatalab.blocks.base import ( DataBlock, +) +from pydatalab.blocks.common import ( + CommentBlock, MediaBlock, - NMRBlock, NotSupportedBlock, ) diff --git a/pydatalab/pydatalab/blocks/blocks.py b/pydatalab/pydatalab/blocks/base.py similarity index 52% rename from pydatalab/pydatalab/blocks/blocks.py rename to pydatalab/pydatalab/blocks/base.py index 9b263c78c..813d07714 100644 --- a/pydatalab/pydatalab/blocks/blocks.py +++ b/pydatalab/pydatalab/blocks/base.py @@ -1,18 +1,8 @@ -import base64 -import io -import os import random -import zipfile from typing import Any, Callable, Dict, Optional, Sequence -import bokeh.embed -import pandas as pd from bson import ObjectId -from PIL import Image -from pydatalab import nmr_utils -from pydatalab.bokeh_plots import mytheme, selectable_axes_plot -from pydatalab.file_utils import get_file_info_by_id from pydatalab.logger import LOGGER __all__ = ("generate_random_id", "DataBlock") @@ -170,138 +160,3 @@ def update_from_web(self, data): self.data.update(data) return self - - -class NotSupportedBlock(DataBlock): - blocktype = "notsupported" - description = "Block not supported" - _supports_collections = True - - -class CommentBlock(DataBlock): - blocktype = "comment" - description = "Comment" - _supports_collections = True - - -class MediaBlock(DataBlock): - blocktype = "media" - description = "Media" - accepted_file_extensions = (".png", ".jpeg", ".jpg", ".tif", ".tiff", ".mp4", ".mov", ".webm") - _supports_collections = False - - @property - def plot_functions(self): - return (self.encode_tiff,) - - def encode_tiff(self): - if "file_id" not in self.data: - LOGGER.warning("ImageBlock.encode_tiff(): No file set in the DataBlock") - return - if "b64_encoded_image" not in self.data: - self.data["b64_encoded_image"] = {} - file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True) - if file_info["name"].endswith(".tif") or file_info["name"].endswith(".tiff"): - im = Image.open(file_info["location"]) - LOGGER.warning("Making base64 encoding of tif") - with io.BytesIO() as f: - im.save(f, format="PNG") - f.seek(0) - self.data["b64_encoded_image"][self.data["file_id"]] = base64.b64encode( - f.getvalue() - ).decode() - - -class NMRBlock(DataBlock): - blocktype = "nmr" - description = "Simple NMR Block" - accepted_file_extensions = ".zip" - defaults = {"process number": 1} - _supports_collections = False - - @property - def plot_functions(self): - return (self.generate_nmr_plot,) - - def read_bruker_nmr_data(self): - if "file_id" not in self.data: - LOGGER.warning("NMRPlot.read_bruker_nmr_data(): No file set in the DataBlock") - return - - zip_file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True) - filename = zip_file_info["name"] - - name, ext = os.path.splitext(filename) - if ext.lower() not in self.accepted_file_extensions: - LOGGER.warning( - "NMRBlock.read_bruker_nmr_data(): Unsupported file extension (must be .zip)" - ) - return - - # unzip: - directory_location = zip_file_info["location"] + ".extracted" - LOGGER.debug(f"Directory location is: {directory_location}") - with zipfile.ZipFile(zip_file_info["location"], "r") as zip_ref: - zip_ref.extractall(directory_location) - - extracted_directory_name = os.path.join(directory_location, name) - available_processes = os.listdir(os.path.join(extracted_directory_name, "pdata")) - - if self.data.get("selected_process") not in available_processes: - self.data["selected_process"] = available_processes[0] - - try: - df, a_dic, topspin_title, processed_data_shape = nmr_utils.read_bruker_1d( - os.path.join(directory_location, name), - process_number=self.data["selected_process"], - verbose=False, - ) - except Exception as error: - LOGGER.critical(f"Unable to parse {name} as Bruker project. {error}") - return - - serialized_df = df.to_dict() if (df is not None) else None - - # all data sorted in a fairly raw way - self.data["processed_data"] = serialized_df - self.data["acquisition_parameters"] = a_dic["acqus"] - self.data["processing_parameters"] = a_dic["procs"] - self.data["pulse_program"] = a_dic["pprog"] - - # specific things that we might want to pull out for the UI: - self.data["available_processes"] = available_processes - self.data["nucleus"] = a_dic["acqus"]["NUC1"] - self.data["carrier_frequency_MHz"] = a_dic["acqus"]["SFO1"] - self.data["carrier_offset_Hz"] = a_dic["acqus"]["O1"] - self.data["recycle_delay"] = a_dic["acqus"]["D"][1] - self.data["nscans"] = a_dic["acqus"]["NS"] - self.data["CNST31"] = a_dic["acqus"]["CNST"][31] - self.data["processed_data_shape"] = processed_data_shape - - self.data["probe_name"] = a_dic["acqus"]["PROBHD"] - self.data["pulse_program_name"] = a_dic["acqus"]["PULPROG"] - self.data["topspin_title"] = topspin_title - - def generate_nmr_plot(self): - self.read_bruker_nmr_data() # currently calls every time plotting happens, but it should only happen if the file was updated - if "processed_data" not in self.data or not self.data["processed_data"]: - self.data["bokeh_plot_data"] = None - return - - df = pd.DataFrame(self.data["processed_data"]) - df["normalized intensity"] = df.intensity / df.intensity.max() - - bokeh_layout = selectable_axes_plot( - df, - x_options=["ppm", "hz"], - y_options=[ - "intensity", - "intensity_per_scan", - "normalized intensity", - ], - plot_line=True, - point_size=3, - ) - bokeh_layout.children[0].x_range.flipped = True # flip x axis, per NMR convention - - self.data["bokeh_plot_data"] = bokeh.embed.json_item(bokeh_layout, theme=mytheme) diff --git a/pydatalab/pydatalab/blocks/common.py b/pydatalab/pydatalab/blocks/common.py new file mode 100644 index 000000000..0d5041b11 --- /dev/null +++ b/pydatalab/pydatalab/blocks/common.py @@ -0,0 +1,49 @@ +import base64 +import io + +from PIL import Image + +from pydatalab.file_utils import get_file_info_by_id +from pydatalab.logger import LOGGER + +from .base import DataBlock + + +class NotSupportedBlock(DataBlock): + blocktype = "notsupported" + description = "Block not supported" + _supports_collections = True + + +class CommentBlock(DataBlock): + blocktype = "comment" + description = "Comment" + _supports_collections = True + + +class MediaBlock(DataBlock): + blocktype = "media" + description = "Media" + accepted_file_extensions = (".png", ".jpeg", ".jpg", ".tif", ".tiff", ".mp4", ".mov", ".webm") + _supports_collections = False + + @property + def plot_functions(self): + return (self.encode_tiff,) + + def encode_tiff(self): + if "file_id" not in self.data: + LOGGER.warning("ImageBlock.encode_tiff(): No file set in the DataBlock") + return + if "b64_encoded_image" not in self.data: + self.data["b64_encoded_image"] = {} + file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True) + if file_info["name"].endswith(".tif") or file_info["name"].endswith(".tiff"): + im = Image.open(file_info["location"]) + LOGGER.warning("Making base64 encoding of tif") + with io.BytesIO() as f: + im.save(f, format="PNG") + f.seek(0) + self.data["b64_encoded_image"][self.data["file_id"]] = base64.b64encode( + f.getvalue() + ).decode()