Skip to content

Commit

Permalink
FormatROD - Multi-axis goniometer, faster decompression (#653)
Browse files Browse the repository at this point in the history
Co-authored-by: Takanori Nakane <[email protected]>
  • Loading branch information
2 people authored and ndevenish committed Aug 3, 2023
1 parent ef5f9ab commit 5c27616
Show file tree
Hide file tree
Showing 5 changed files with 272 additions and 78 deletions.
1 change: 1 addition & 0 deletions newsfragments/653.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
``FormatROD``: include support for multi-axis goniometers and faster decompression.
126 changes: 126 additions & 0 deletions src/dxtbx/boost_python/compression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,129 @@ unsigned int dxtbx::boost_python::cbf_decompress(const char *packed,

return values - original;
}

inline uint32_t read_uint32_from_bytearray(const char *buf) {
// `char` can be signed or unsigned depending on the platform.
// For bit shift operations, we need unsigned values.
// If `char` on the platform is signed, converting directly to "unsigned int" can
// produce huge numbers because modulo 2^n is taken by the integral conversion
// rules. Thus, we have to explicitly cast to `unsigned char` first.
// Then the automatic integral promotion converts them to `int`.
// Note that the unsigned to signed conversion is implementation-dependent
// and might not produce the intended result if two's complement is not used.
// Fortunately, DIALS targets only two's complement.
// https://github.com/cctbx/dxtbx/issues/11#issuecomment-1657809645
// Moreover, C++20 standarized this:
// https://stackoverflow.com/questions/54947427/going-from-signed-integers-to-unsigned-integers-and-vice-versa-in-c20

return ((unsigned char)buf[0]) | (((unsigned char)buf[1]) << 8)
| (((unsigned char)buf[2]) << 16) | (((unsigned char)buf[3]) << 24);
}

inline uint16_t read_uint16_from_bytearray(const char *buf) {
return ((unsigned char)buf[0]) | ((unsigned char)buf[1] << 8);
}

void dxtbx::boost_python::rod_TY6_decompress(int *const ret,
const char *const buf_data,
const char *const buf_offsets,
const int slow,
const int fast) {
const size_t BLOCKSIZE = 8; // Codes below assume this is at most 8
const signed int SHORT_OVERFLOW = 127; // after 127 is subtracted
const signed int LONG_OVERFLOW = 128;

const size_t nblock = (fast - 1) / (BLOCKSIZE * 2);
const size_t nrest = (fast - 1) % (BLOCKSIZE * 2);

for (size_t iy = 0; iy < slow; iy++) {
size_t ipos = read_uint32_from_bytearray(buf_offsets + iy * sizeof(uint32_t));
size_t opos = fast * iy;

// Values from -127 to +126 (inclusive) are stored with an offset of 127
// as 0 to 253. 254 and 255 mark short and long overflows.
// Other values ("overflows") are represented in two's complement.

int firstpx = (unsigned char)buf_data[ipos++] - 127;
if (firstpx == LONG_OVERFLOW) {
// See comments in read_uint32_from_bytearray() about
// the safety of the unsigned to signed conversion.
firstpx = (signed int)read_uint32_from_bytearray(buf_data + ipos);
ipos += 4;
} else if (firstpx == SHORT_OVERFLOW) {
firstpx = (signed short)read_uint16_from_bytearray(buf_data + ipos);
ipos += 2;
}
ret[opos++] = firstpx;

// For every two blocks
for (int k = 0; k < nblock; k++) {
const size_t bittypes = buf_data[ipos++];
const size_t nbits[2] = {bittypes & 15, (bittypes >> 4) & 15};

// One pixel is stored using `nbit` bits.
// Although `nbit` itself is stored using 4 bits,
// only values 1 (0001b) to 8 (1000b) are allowed.
// Negative values are encoded as follows. (Not 2's complement!)
// - When nbit = 1, the pixel value is 0 or 1
// - When nbit = 2, the pixel value is -1, 0, 1, 2
// - When nbit = 3, the pixel value is -3, -2, 1, 0, 1, 2, 3, 4
// - When nbit - 8, the pixel value is -127, -126, ...,
// 127 (== // SHORT_OVERFLOW), 128 (== LONG_OVERFLOW)

// Load values
for (int i = 0; i < 2; i++) {
const size_t nbit = nbits[i];
assert(nbit >= 0 && nbit <= 8);

int zero_at = 0;
if (nbit > 1) {
zero_at = (1 << (nbit - 1)) - 1;
}

// Since nbit is at most 8, 8 * 8 (= BLOCKSIZE) = 64 bits are sufficient.
unsigned long long v = 0;
for (int j = 0; j < nbit; j++) {
// Implicit promotion is only up to 32 bits, not 64 bits so we have to be
// explicit.
v |= (long long)((unsigned char)buf_data[ipos++]) << (BLOCKSIZE * j);
}

const unsigned long long mask = (1 << nbit) - 1;
for (int j = 0; j < BLOCKSIZE; j++) {
ret[opos++] = ((v >> (nbit * j)) & mask) - zero_at;
}
}

// Apply differences. Load more values when overflown.
for (size_t i = opos - 2 * BLOCKSIZE; i < opos; i++) {
int offset = ret[i];

if (offset == LONG_OVERFLOW) {
offset = (signed int)read_uint32_from_bytearray(buf_data + ipos);
ipos += 4;
} else if (offset == SHORT_OVERFLOW) {
offset = (signed short)read_uint16_from_bytearray(buf_data + ipos);
ipos += 2;
}

ret[i] = offset + ret[i - 1];
}
}

for (int i = 0; i < nrest; i++) {
int offset = (unsigned char)buf_data[ipos++] - 127;

if (offset == LONG_OVERFLOW) {
offset = (signed int)read_uint32_from_bytearray(buf_data + ipos);
ipos += 4;
} else if (offset == SHORT_OVERFLOW) {
offset = (signed short)read_uint16_from_bytearray(buf_data + ipos);
ipos += 2;
}

ret[opos] = ret[opos - 1] + offset;
opos++;
}
}
}
6 changes: 6 additions & 0 deletions src/dxtbx/boost_python/compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
namespace dxtbx { namespace boost_python {
unsigned int cbf_decompress(const char*, std::size_t, int*, const std::size_t);
std::vector<char> cbf_compress(const int*, const std::size_t&);
// Decompress Rigaku Oxford diffractometer TY6 compression
void rod_TY6_decompress(int* const,
const char* const,
const char* const,
const int,
const int);
}} // namespace dxtbx::boost_python

#endif
21 changes: 21 additions & 0 deletions src/dxtbx/boost_python/ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,24 @@ namespace dxtbx { namespace boost_python {
return PyBytes_FromStringAndSize(&*packed.begin(), packed.size());
}

// Python entry point to decompress Rigaku Oxford Diffractometer TY6 compression
scitbx::af::flex_int uncompress_rod_TY6(const boost::python::object &data,
const boost::python::object &offsets,
const int &slow,
const int &fast) {
// Cannot I extract const char* directly?
std::string str_data = boost::python::extract<std::string>(data);
std::string str_offsets = boost::python::extract<std::string>(offsets);

scitbx::af::flex_int z((scitbx::af::flex_grid<>(slow, fast)),
scitbx::af::init_functor_null<int>());

dxtbx::boost_python::rod_TY6_decompress(
z.begin(), str_data.c_str(), str_offsets.c_str(), slow, fast);

return z;
}

void init_module() {
using namespace boost::python;
def("read_uint8", read_uint8, (arg("file"), arg("count")));
Expand All @@ -206,6 +224,9 @@ namespace dxtbx { namespace boost_python {
def("is_big_endian", is_big_endian);
def("uncompress", &uncompress, (arg_("packed"), arg_("slow"), arg_("fast")));
def("compress", &compress);
def("uncompress_rod_TY6",
&uncompress_rod_TY6,
(arg_("data"), arg_("offsets"), arg_("slow"), arg_("fast")));
}

BOOST_PYTHON_MODULE(dxtbx_ext) {
Expand Down
Loading

0 comments on commit 5c27616

Please sign in to comment.