diff --git a/newsfragments/653.feature b/newsfragments/653.feature new file mode 100644 index 000000000..22c7d8c0e --- /dev/null +++ b/newsfragments/653.feature @@ -0,0 +1 @@ +``FormatROD``: include support for multi-axis goniometers and faster decompression. diff --git a/src/dxtbx/boost_python/compression.cc b/src/dxtbx/boost_python/compression.cc index ff5d91c5f..6957d1bd1 100644 --- a/src/dxtbx/boost_python/compression.cc +++ b/src/dxtbx/boost_python/compression.cc @@ -164,3 +164,129 @@ unsigned int dxtbx::boost_python::cbf_decompress(const char *packed, return values - original; } + +inline uint32_t read_uint32_from_bytearray(const char *buf) { + // `char` can be signed or unsigned depending on the platform. + // For bit shift operations, we need unsigned values. + // If `char` on the platform is signed, converting directly to "unsigned int" can + // produce huge numbers because modulo 2^n is taken by the integral conversion + // rules. Thus, we have to explicitly cast to `unsigned char` first. + // Then the automatic integral promotion converts them to `int`. + // Note that the unsigned to signed conversion is implementation-dependent + // and might not produce the intended result if two's complement is not used. + // Fortunately, DIALS targets only two's complement. + // https://github.com/cctbx/dxtbx/issues/11#issuecomment-1657809645 + // Moreover, C++20 standarized this: + // https://stackoverflow.com/questions/54947427/going-from-signed-integers-to-unsigned-integers-and-vice-versa-in-c20 + + return ((unsigned char)buf[0]) | (((unsigned char)buf[1]) << 8) + | (((unsigned char)buf[2]) << 16) | (((unsigned char)buf[3]) << 24); +} + +inline uint16_t read_uint16_from_bytearray(const char *buf) { + return ((unsigned char)buf[0]) | ((unsigned char)buf[1] << 8); +} + +void dxtbx::boost_python::rod_TY6_decompress(int *const ret, + const char *const buf_data, + const char *const buf_offsets, + const int slow, + const int fast) { + const size_t BLOCKSIZE = 8; // Codes below assume this is at most 8 + const signed int SHORT_OVERFLOW = 127; // after 127 is subtracted + const signed int LONG_OVERFLOW = 128; + + const size_t nblock = (fast - 1) / (BLOCKSIZE * 2); + const size_t nrest = (fast - 1) % (BLOCKSIZE * 2); + + for (size_t iy = 0; iy < slow; iy++) { + size_t ipos = read_uint32_from_bytearray(buf_offsets + iy * sizeof(uint32_t)); + size_t opos = fast * iy; + + // Values from -127 to +126 (inclusive) are stored with an offset of 127 + // as 0 to 253. 254 and 255 mark short and long overflows. + // Other values ("overflows") are represented in two's complement. + + int firstpx = (unsigned char)buf_data[ipos++] - 127; + if (firstpx == LONG_OVERFLOW) { + // See comments in read_uint32_from_bytearray() about + // the safety of the unsigned to signed conversion. + firstpx = (signed int)read_uint32_from_bytearray(buf_data + ipos); + ipos += 4; + } else if (firstpx == SHORT_OVERFLOW) { + firstpx = (signed short)read_uint16_from_bytearray(buf_data + ipos); + ipos += 2; + } + ret[opos++] = firstpx; + + // For every two blocks + for (int k = 0; k < nblock; k++) { + const size_t bittypes = buf_data[ipos++]; + const size_t nbits[2] = {bittypes & 15, (bittypes >> 4) & 15}; + + // One pixel is stored using `nbit` bits. + // Although `nbit` itself is stored using 4 bits, + // only values 1 (0001b) to 8 (1000b) are allowed. + // Negative values are encoded as follows. (Not 2's complement!) + // - When nbit = 1, the pixel value is 0 or 1 + // - When nbit = 2, the pixel value is -1, 0, 1, 2 + // - When nbit = 3, the pixel value is -3, -2, 1, 0, 1, 2, 3, 4 + // - When nbit - 8, the pixel value is -127, -126, ..., + // 127 (== // SHORT_OVERFLOW), 128 (== LONG_OVERFLOW) + + // Load values + for (int i = 0; i < 2; i++) { + const size_t nbit = nbits[i]; + assert(nbit >= 0 && nbit <= 8); + + int zero_at = 0; + if (nbit > 1) { + zero_at = (1 << (nbit - 1)) - 1; + } + + // Since nbit is at most 8, 8 * 8 (= BLOCKSIZE) = 64 bits are sufficient. + unsigned long long v = 0; + for (int j = 0; j < nbit; j++) { + // Implicit promotion is only up to 32 bits, not 64 bits so we have to be + // explicit. + v |= (long long)((unsigned char)buf_data[ipos++]) << (BLOCKSIZE * j); + } + + const unsigned long long mask = (1 << nbit) - 1; + for (int j = 0; j < BLOCKSIZE; j++) { + ret[opos++] = ((v >> (nbit * j)) & mask) - zero_at; + } + } + + // Apply differences. Load more values when overflown. + for (size_t i = opos - 2 * BLOCKSIZE; i < opos; i++) { + int offset = ret[i]; + + if (offset == LONG_OVERFLOW) { + offset = (signed int)read_uint32_from_bytearray(buf_data + ipos); + ipos += 4; + } else if (offset == SHORT_OVERFLOW) { + offset = (signed short)read_uint16_from_bytearray(buf_data + ipos); + ipos += 2; + } + + ret[i] = offset + ret[i - 1]; + } + } + + for (int i = 0; i < nrest; i++) { + int offset = (unsigned char)buf_data[ipos++] - 127; + + if (offset == LONG_OVERFLOW) { + offset = (signed int)read_uint32_from_bytearray(buf_data + ipos); + ipos += 4; + } else if (offset == SHORT_OVERFLOW) { + offset = (signed short)read_uint16_from_bytearray(buf_data + ipos); + ipos += 2; + } + + ret[opos] = ret[opos - 1] + offset; + opos++; + } + } +} diff --git a/src/dxtbx/boost_python/compression.h b/src/dxtbx/boost_python/compression.h index d046b3965..b50b5e73f 100644 --- a/src/dxtbx/boost_python/compression.h +++ b/src/dxtbx/boost_python/compression.h @@ -6,6 +6,12 @@ namespace dxtbx { namespace boost_python { unsigned int cbf_decompress(const char*, std::size_t, int*, const std::size_t); std::vector cbf_compress(const int*, const std::size_t&); + // Decompress Rigaku Oxford diffractometer TY6 compression + void rod_TY6_decompress(int* const, + const char* const, + const char* const, + const int, + const int); }} // namespace dxtbx::boost_python #endif diff --git a/src/dxtbx/boost_python/ext.cpp b/src/dxtbx/boost_python/ext.cpp index 5273eb69e..70c69c06b 100644 --- a/src/dxtbx/boost_python/ext.cpp +++ b/src/dxtbx/boost_python/ext.cpp @@ -193,6 +193,24 @@ namespace dxtbx { namespace boost_python { return PyBytes_FromStringAndSize(&*packed.begin(), packed.size()); } + // Python entry point to decompress Rigaku Oxford Diffractometer TY6 compression + scitbx::af::flex_int uncompress_rod_TY6(const boost::python::object &data, + const boost::python::object &offsets, + const int &slow, + const int &fast) { + // Cannot I extract const char* directly? + std::string str_data = boost::python::extract(data); + std::string str_offsets = boost::python::extract(offsets); + + scitbx::af::flex_int z((scitbx::af::flex_grid<>(slow, fast)), + scitbx::af::init_functor_null()); + + dxtbx::boost_python::rod_TY6_decompress( + z.begin(), str_data.c_str(), str_offsets.c_str(), slow, fast); + + return z; + } + void init_module() { using namespace boost::python; def("read_uint8", read_uint8, (arg("file"), arg("count"))); @@ -206,6 +224,9 @@ namespace dxtbx { namespace boost_python { def("is_big_endian", is_big_endian); def("uncompress", &uncompress, (arg_("packed"), arg_("slow"), arg_("fast"))); def("compress", &compress); + def("uncompress_rod_TY6", + &uncompress_rod_TY6, + (arg_("data"), arg_("offsets"), arg_("slow"), arg_("fast"))); } BOOST_PYTHON_MODULE(dxtbx_ext) { diff --git a/src/dxtbx/format/FormatROD.py b/src/dxtbx/format/FormatROD.py index 417f99f1a..5ce6bd814 100644 --- a/src/dxtbx/format/FormatROD.py +++ b/src/dxtbx/format/FormatROD.py @@ -12,9 +12,7 @@ from __future__ import annotations __author__ = "David Waterman, Takanori Nakane" -__copyright__ = ( - "Copyright 2018 United Kingdom Research and Innovation & 2022 Takanori Nakane" -) +__copyright__ = "Copyright 2018-2023 United Kingdom Research and Innovation & 2022-2023 Takanori Nakane" __license__ = "BSD 3-clause" import re @@ -23,7 +21,9 @@ import numpy as np from scitbx.array_family import flex +from scitbx.math import r3_rotation_axis_and_angle_as_matrix +from dxtbx.ext import uncompress_rod_TY6 from dxtbx.format.Format import Format @@ -153,7 +153,7 @@ def _read_binary_header( f.seek(offset + general_nbytes + special_nbytes + 640) # detector rotation in degrees along e1, e2, e3 detector_rotns = struct.unpack("> 4) & 15) ipos += 1 - # ipos_bit = ipos * 8 for i in range(2): nbit = nbits[i] @@ -455,5 +488,12 @@ def decode_TY6_oneline(self, linedata, w): reader = FormatROD(arg) print(reader._txt_header) print(reader._bin_header) + print() + print( + "Starting angles in degrees (OMEGA, THETA, KAPPA=CHI, PHI, OMEGA PRIME, THETA PRIME)" + ) + print(reader._gonio_start_angles) + print("Ending angles in degrees") + print(reader._gonio_end_angles) else: print("Unsupported format")