Merge branch 'main' into remove-datablock

cctbx · Aug 11, 2023 · 0d30762 · 0d30762
2 parents 004a99e + 5e079c7
commit 0d30762
Show file tree

Hide file tree

Showing 41 changed files with 1,611 additions and 109 deletions.
diff --git a/.azure-pipelines/ci-conda-env.txt b/.azure-pipelines/ci-conda-env.txt
@@ -2,7 +2,7 @@ conda-forge::boost
 conda-forge::boost-cpp
 conda-forge::bzip2
 conda-forge::c-compiler<1.5
-conda-forge::cctbx-base==2023.5
+conda-forge::cctbx-base==2023.7
 conda-forge::conda
 conda-forge::cxx-compiler<1.5
 conda-forge::python-dateutil

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR)
 project(dxtbx LANGUAGES C CXX)
 
 # Add the included modules
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/")
 
 # General cmake environment configuration
 include(SetDefaultBuildRelWithDebInfo) # Default builds to release with debug info

diff --git a/newsfragments/439.bugfix b/newsfragments/439.bugfix
@@ -0,0 +1 @@
+``flumpy``: Fix case where incorrect ``flex.vec2``, ``flex.vec3`` could be generated.
diff --git a/newsfragments/612.bugfix b/newsfragments/612.bugfix
@@ -0,0 +1 @@
+NXmx files with multidimensional arrays (images, modules, or both) are now handled.
diff --git a/newsfragments/621.feature b/newsfragments/621.feature
@@ -0,0 +1 @@
+Add new Beam class "PolychromaticBeam" for polychromatic/multi-wavelength/wide bandpass experiments.
diff --git a/newsfragments/626.feature b/newsfragments/626.feature
@@ -0,0 +1 @@
+Update Format handling to reflect move of Eiger detector from PETRA P14 to P13.
diff --git a/newsfragments/633.bugfix b/newsfragments/633.bugfix
@@ -0,0 +1 @@
+Slicing of imageset objects is now consistently 0-based, including for the sliced data accessor. Previously, the data accessor had to be accessed with the original index offsets.
diff --git a/newsfragments/645.feature b/newsfragments/645.feature
@@ -0,0 +1 @@
+Add partial support for the Rigaku Oxford Diffraction file format.
diff --git a/newsfragments/647.feature b/newsfragments/647.feature
@@ -0,0 +1 @@
+The ``Beam`` model now has a ``probe`` value to keep track of the type of radiation.
diff --git a/newsfragments/649.feature b/newsfragments/649.feature
@@ -0,0 +1 @@
+Add CBFMini support for the EIGER2 16M detector at CHESS beamline ID7B2, which has an inverted rotation axis.
diff --git a/newsfragments/650.misc b/newsfragments/650.misc
@@ -0,0 +1 @@
+Format classes are now tested against invalid binary data with dials-data, for when dials-regression is not present.
diff --git a/newsfragments/651.feature b/newsfragments/651.feature
@@ -0,0 +1 @@
+Add support for Eiger 9M on ESRF ID23-2, which has an undeclared vertical goniometer.
diff --git a/newsfragments/652.bugfix b/newsfragments/652.bugfix
@@ -0,0 +1 @@
+``dxtbx``: add fix for Eiger / NXmx data from i19-2 to correctly assign the image bit depth
diff --git a/newsfragments/653.feature b/newsfragments/653.feature
@@ -0,0 +1 @@
+``FormatROD``: include support for multi-axis goniometers and faster decompression.
diff --git a/newsfragments/655.misc b/newsfragments/655.misc
@@ -0,0 +1 @@
+Update prebuilt CI CCTBX version.
diff --git a/src/dxtbx/boost_python/compression.cc b/src/dxtbx/boost_python/compression.cc
@@ -164,3 +164,129 @@ unsigned int dxtbx::boost_python::cbf_decompress(const char *packed,
 
   return values - original;
 }
+
+inline uint32_t read_uint32_from_bytearray(const char *buf) {
+  // `char` can be signed or unsigned depending on the platform.
+  // For bit shift operations, we need unsigned values.
+  // If `char` on the platform is signed, converting directly to "unsigned int" can
+  // produce huge numbers because modulo 2^n is taken by the integral conversion
+  // rules. Thus, we have to explicitly cast to `unsigned char` first.
+  // Then the automatic integral promotion converts them to `int`.
+  // Note that the unsigned to signed conversion is implementation-dependent
+  // and might not produce the intended result if two's complement is not used.
+  // Fortunately, DIALS targets only two's complement.
+  //    https://github.com/cctbx/dxtbx/issues/11#issuecomment-1657809645
+  // Moreover, C++20 standarized this:
+  //    https://stackoverflow.com/questions/54947427/going-from-signed-integers-to-unsigned-integers-and-vice-versa-in-c20
+
+  return ((unsigned char)buf[0]) | (((unsigned char)buf[1]) << 8)
+         | (((unsigned char)buf[2]) << 16) | (((unsigned char)buf[3]) << 24);
+}
+
+inline uint16_t read_uint16_from_bytearray(const char *buf) {
+  return ((unsigned char)buf[0]) | ((unsigned char)buf[1] << 8);
+}
+
+void dxtbx::boost_python::rod_TY6_decompress(int *const ret,
+                                             const char *const buf_data,
+                                             const char *const buf_offsets,
+                                             const int slow,
+                                             const int fast) {
+  const size_t BLOCKSIZE = 8;             // Codes below assume this is at most 8
+  const signed int SHORT_OVERFLOW = 127;  // after 127 is subtracted
+  const signed int LONG_OVERFLOW = 128;
+
+  const size_t nblock = (fast - 1) / (BLOCKSIZE * 2);
+  const size_t nrest = (fast - 1) % (BLOCKSIZE * 2);
+
+  for (size_t iy = 0; iy < slow; iy++) {
+    size_t ipos = read_uint32_from_bytearray(buf_offsets + iy * sizeof(uint32_t));
+    size_t opos = fast * iy;
+
+    // Values from -127 to +126 (inclusive) are stored with an offset of 127
+    // as 0 to 253. 254 and 255 mark short and long overflows.
+    // Other values ("overflows") are represented in two's complement.
+
+    int firstpx = (unsigned char)buf_data[ipos++] - 127;
+    if (firstpx == LONG_OVERFLOW) {
+      // See comments in read_uint32_from_bytearray() about
+      // the safety of the unsigned to signed conversion.
+      firstpx = (signed int)read_uint32_from_bytearray(buf_data + ipos);
+      ipos += 4;
+    } else if (firstpx == SHORT_OVERFLOW) {
+      firstpx = (signed short)read_uint16_from_bytearray(buf_data + ipos);
+      ipos += 2;
+    }
+    ret[opos++] = firstpx;
+
+    // For every two blocks
+    for (int k = 0; k < nblock; k++) {
+      const size_t bittypes = buf_data[ipos++];
+      const size_t nbits[2] = {bittypes & 15, (bittypes >> 4) & 15};
+
+      // One pixel is stored using `nbit` bits.
+      // Although `nbit` itself is stored using 4 bits,
+      // only values 1 (0001b) to 8 (1000b) are allowed.
+      // Negative values are encoded as follows. (Not 2's complement!)
+      // - When nbit = 1, the pixel value is 0 or 1
+      // - When nbit = 2, the pixel value is -1, 0, 1, 2
+      // - When nbit = 3, the pixel value is -3, -2, 1, 0, 1, 2, 3, 4
+      // - When nbit - 8, the pixel value is -127, -126, ...,
+      //   127 (== // SHORT_OVERFLOW), 128 (== LONG_OVERFLOW)
+
+      // Load values
+      for (int i = 0; i < 2; i++) {
+        const size_t nbit = nbits[i];
+        assert(nbit >= 0 && nbit <= 8);
+
+        int zero_at = 0;
+        if (nbit > 1) {
+          zero_at = (1 << (nbit - 1)) - 1;
+        }
+
+        // Since nbit is at most 8, 8 * 8 (= BLOCKSIZE) = 64 bits are sufficient.
+        unsigned long long v = 0;
+        for (int j = 0; j < nbit; j++) {
+          // Implicit promotion is only up to 32 bits, not 64 bits so we have to be
+          // explicit.
+          v |= (long long)((unsigned char)buf_data[ipos++]) << (BLOCKSIZE * j);
+        }
+
+        const unsigned long long mask = (1 << nbit) - 1;
+        for (int j = 0; j < BLOCKSIZE; j++) {
+          ret[opos++] = ((v >> (nbit * j)) & mask) - zero_at;
+        }
+      }
+
+      // Apply differences. Load more values when overflown.
+      for (size_t i = opos - 2 * BLOCKSIZE; i < opos; i++) {
+        int offset = ret[i];
+
+        if (offset == LONG_OVERFLOW) {
+          offset = (signed int)read_uint32_from_bytearray(buf_data + ipos);
+          ipos += 4;
+        } else if (offset == SHORT_OVERFLOW) {
+          offset = (signed short)read_uint16_from_bytearray(buf_data + ipos);
+          ipos += 2;
+        }
+
+        ret[i] = offset + ret[i - 1];
+      }
+    }
+
+    for (int i = 0; i < nrest; i++) {
+      int offset = (unsigned char)buf_data[ipos++] - 127;
+
+      if (offset == LONG_OVERFLOW) {
+        offset = (signed int)read_uint32_from_bytearray(buf_data + ipos);
+        ipos += 4;
+      } else if (offset == SHORT_OVERFLOW) {
+        offset = (signed short)read_uint16_from_bytearray(buf_data + ipos);
+        ipos += 2;
+      }
+
+      ret[opos] = ret[opos - 1] + offset;
+      opos++;
+    }
+  }
+}
diff --git a/src/dxtbx/boost_python/compression.h b/src/dxtbx/boost_python/compression.h
@@ -6,6 +6,12 @@
 namespace dxtbx { namespace boost_python {
   unsigned int cbf_decompress(const char*, std::size_t, int*, const std::size_t);
   std::vector<char> cbf_compress(const int*, const std::size_t&);
+  // Decompress Rigaku Oxford diffractometer TY6 compression
+  void rod_TY6_decompress(int* const,
+                          const char* const,
+                          const char* const,
+                          const int,
+                          const int);
 }}  // namespace dxtbx::boost_python
 
 #endif
diff --git a/src/dxtbx/boost_python/ext.cpp b/src/dxtbx/boost_python/ext.cpp
@@ -193,6 +193,24 @@ namespace dxtbx { namespace boost_python {
     return PyBytes_FromStringAndSize(&*packed.begin(), packed.size());
   }
 
+  // Python entry point to decompress Rigaku Oxford Diffractometer TY6 compression
+  scitbx::af::flex_int uncompress_rod_TY6(const boost::python::object &data,
+                                          const boost::python::object &offsets,
+                                          const int &slow,
+                                          const int &fast) {
+    // Cannot I extract const char* directly?
+    std::string str_data = boost::python::extract<std::string>(data);
+    std::string str_offsets = boost::python::extract<std::string>(offsets);
+
+    scitbx::af::flex_int z((scitbx::af::flex_grid<>(slow, fast)),
+                           scitbx::af::init_functor_null<int>());
+
+    dxtbx::boost_python::rod_TY6_decompress(
+      z.begin(), str_data.c_str(), str_offsets.c_str(), slow, fast);
+
+    return z;
+  }
+
   void init_module() {
     using namespace boost::python;
     def("read_uint8", read_uint8, (arg("file"), arg("count")));
@@ -206,6 +224,9 @@ namespace dxtbx { namespace boost_python {
     def("is_big_endian", is_big_endian);
     def("uncompress", &uncompress, (arg_("packed"), arg_("slow"), arg_("fast")));
     def("compress", &compress);
+    def("uncompress_rod_TY6",
+        &uncompress_rod_TY6,
+        (arg_("data"), arg_("offsets"), arg_("slow"), arg_("fast")));
   }
 
   BOOST_PYTHON_MODULE(dxtbx_ext) {

diff --git a/src/dxtbx/boost_python/flumpy.cc b/src/dxtbx/boost_python/flumpy.cc
@@ -520,6 +520,13 @@ py::object vec_from_numpy(py::array np_array) {
 
   static_assert(VecType<int>::fixed_size == 2 || VecType<int>::fixed_size == 3,
                 "Only vec2/vec3 supported");
+
+  // Only accept arrays that have a dimension higher than 1 - we want
+  // numpy.array([1,2,3]) to fail but numpy.array([[1,2,3]]) to work
+  if (np_array.ndim() == 1) {
+    throw std::invalid_argument("Array for conversion to vec must be multidimensional");
+  }
+
   // Only accept arrays whose last dimension is the size of this object
   if (np_array.shape(np_array.ndim() - 1) != VecType<int>::fixed_size) {
     throw std::invalid_argument("Input array last dimension is not size "

diff --git a/src/dxtbx/dxtbx_model_ext.pyi b/src/dxtbx/dxtbx_model_ext.pyi
@@ -22,6 +22,8 @@ from scitbx.array_family import shared as flex_shared
 # Attempt to use the stub typing for flex-inheritance
 from scitbx.array_family.flex import FlexPlain
 
+from dxtbx_model_ext import Probe  # type: ignore
+
 # TypeVar for the set of Experiment models that can be joint-accepted
 # - profile, imageset and scalingmodel are handled as 'object'
 TExperimentModel = TypeVar(
@@ -113,6 +115,37 @@ class Beam(BeamBase):
     @staticmethod
     def from_dict(data: Dict) -> Beam: ...
     def to_dict(self) -> Dict: ...
+    @staticmethod
+    def get_probe_from_name(name: str) -> Probe: ...
+
+class PolychromaticBeam(Beam):
+    @overload
+    def __init__(self, beam: PolychromaticBeam) -> None: ...
+    @overload
+    def __init__(self, direction: Vec3Float) -> None: ...
+    @overload
+    def __init__(
+        self,
+        direction: Vec3Float,
+        divergence: float,
+        sigma_divergence: float,
+        deg: bool = ...,
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        direction: Vec3Float,
+        divergence: float,
+        sigma_divergence: float,
+        polarization_normal: Vec3Float,
+        polarization_fraction: float,
+        flux: float,
+        transmission: float,
+        deg: bool = ...,
+    ) -> None: ...
+    @staticmethod
+    def from_dict(data: Dict) -> PolychromaticBeam: ...
+    def to_dict(self) -> Dict: ...
 
 class CrystalBase:
     @property

diff --git a/src/dxtbx/format/FormatCBFMini.py b/src/dxtbx/format/FormatCBFMini.py
@@ -8,6 +8,7 @@
 from __future__ import annotations
 
 import binascii
+import datetime
 import os
 import pathlib
 import sys
@@ -73,6 +74,20 @@ def __init__(self, image_file, **kwargs):
         self._raw_data = None
         super().__init__(image_file, **kwargs)
 
+    @staticmethod
+    def _get_timestamp_from_raw_header(
+        header: str | list[str],
+    ) -> datetime.datetime | None:
+        """Given a raw header, or lines from, attempt to extract the timestamp field"""
+        if isinstance(header, str):
+            header = header.splitlines()
+        timestamp = None
+        for record in header:
+            if len(record[1:].split()) <= 2 and record.count(":") == 2:
+                timestamp = datetime.datetime.fromisoformat(record[1:].strip())
+                break
+        return timestamp
+
     def _start(self):
         """Open the image file, read the image header, copy it into a
         dictionary for future reference."""

diff --git a/src/dxtbx/format/FormatCBFMiniEigerChessID7B2.py b/src/dxtbx/format/FormatCBFMiniEigerChessID7B2.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+import sys
+
+from dxtbx.format.FormatCBFMiniEiger import FormatCBFMiniEiger
+
+
+class FormatCBFMiniEigerChessID7B2(FormatCBFMiniEiger):
+    """A class for reading mini CBF format Eiger16M images for S/N E-32-0123
+    installed at CHESS ID7B2, which has an inverted goniometer axis."""
+
+    @staticmethod
+    def understand(image_file):
+        """Check to see if this looks like an Eiger mini CBF format image,
+        i.e. we can make sense of it."""
+
+        header = FormatCBFMiniEiger.get_cbf_header(image_file)
+        for record in header.split("\n"):
+            if "# Detector: Dectris EIGER2 Si 16M, S/N E-32-0123" in record:
+                return True
+
+        return False
+
+    def _goniometer(self):
+        return self._goniometer_factory.known_axis((-1, 0, 0))
+
+
+if __name__ == "__main__":
+    for arg in sys.argv[1:]:
+        print(FormatCBFMiniEigerChessID7B2.understand(arg))
diff --git a/src/dxtbx/format/FormatCBFMiniEigerPetraP14.py b/src/dxtbx/format/FormatCBFMiniEigerPetraP14.py
@@ -3,6 +3,7 @@
 
 from __future__ import annotations
 
+import datetime
 import sys
 
 from dxtbx.format.FormatCBFMiniEiger import FormatCBFMiniEiger
@@ -19,11 +20,21 @@ def understand(image_file):
 
         header = FormatCBFMiniEiger.get_cbf_header(image_file)
 
+        # Valid from 22nd May 2021
+        expected_serial = "E-32-0129"
+        if timestamp := FormatCBFMiniEiger._get_timestamp_from_raw_header(header):
+            # We have a timestamp. Let's see what detector we should expect
+
+            # Before 22nd May 2021
+            if timestamp < datetime.datetime(2021, 5, 22):
+                expected_serial = "E-32-0107"
+
+        # Find the line recording detector serial, and check
         for record in header.split("\n"):
             if (
                 "# detector" in record.lower()
                 and "eiger" in record.lower()
-                and "E-32-0107" in record
+                and expected_serial in record
             ):
                 return True
 

diff --git a/src/dxtbx/format/FormatGatanDM4.py b/src/dxtbx/format/FormatGatanDM4.py
@@ -21,6 +21,7 @@
 )
 from dxtbx.format.Format import Format
 from dxtbx.format.FormatMultiImage import FormatMultiImage
+from dxtbx.model.beam import Probe
 
 
 def read_tag(f, byteorder):
@@ -358,6 +359,7 @@ def _beam(self):
             wavelength=wavelength,
             polarization=(0, 1, 0),
             polarization_fraction=0.5,
+            probe=Probe.electron,
         )
 
     def _scan(self):