diff --git a/newsfragments/653.feature b/newsfragments/653.feature
new file mode 100644
index 000000000..22c7d8c0e
--- /dev/null
+++ b/newsfragments/653.feature
@@ -0,0 +1 @@
+``FormatROD``: include support for multi-axis goniometers and faster decompression.
diff --git a/src/dxtbx/boost_python/compression.cc b/src/dxtbx/boost_python/compression.cc
index ff5d91c5f..6957d1bd1 100644
--- a/src/dxtbx/boost_python/compression.cc
+++ b/src/dxtbx/boost_python/compression.cc
@@ -164,3 +164,129 @@ unsigned int dxtbx::boost_python::cbf_decompress(const char *packed,
 
   return values - original;
 }
+
+inline uint32_t read_uint32_from_bytearray(const char *buf) {
+  // `char` can be signed or unsigned depending on the platform.
+  // For bit shift operations, we need unsigned values.
+  // If `char` on the platform is signed, converting directly to "unsigned int" can
+  // produce huge numbers because modulo 2^n is taken by the integral conversion
+  // rules. Thus, we have to explicitly cast to `unsigned char` first.
+  // Then the automatic integral promotion converts them to `int`.
+  // Note that the unsigned to signed conversion is implementation-dependent
+  // and might not produce the intended result if two's complement is not used.
+  // Fortunately, DIALS targets only two's complement.
+  //    https://github.com/cctbx/dxtbx/issues/11#issuecomment-1657809645
+  // Moreover, C++20 standarized this:
+  //    https://stackoverflow.com/questions/54947427/going-from-signed-integers-to-unsigned-integers-and-vice-versa-in-c20
+
+  return ((unsigned char)buf[0]) | (((unsigned char)buf[1]) << 8)
+         | (((unsigned char)buf[2]) << 16) | (((unsigned char)buf[3]) << 24);
+}
+
+inline uint16_t read_uint16_from_bytearray(const char *buf) {
+  return ((unsigned char)buf[0]) | ((unsigned char)buf[1] << 8);
+}
+
+void dxtbx::boost_python::rod_TY6_decompress(int *const ret,
+                                             const char *const buf_data,
+                                             const char *const buf_offsets,
+                                             const int slow,
+                                             const int fast) {
+  const size_t BLOCKSIZE = 8;             // Codes below assume this is at most 8
+  const signed int SHORT_OVERFLOW = 127;  // after 127 is subtracted
+  const signed int LONG_OVERFLOW = 128;
+
+  const size_t nblock = (fast - 1) / (BLOCKSIZE * 2);
+  const size_t nrest = (fast - 1) % (BLOCKSIZE * 2);
+
+  for (size_t iy = 0; iy < slow; iy++) {
+    size_t ipos = read_uint32_from_bytearray(buf_offsets + iy * sizeof(uint32_t));
+    size_t opos = fast * iy;
+
+    // Values from -127 to +126 (inclusive) are stored with an offset of 127
+    // as 0 to 253. 254 and 255 mark short and long overflows.
+    // Other values ("overflows") are represented in two's complement.
+
+    int firstpx = (unsigned char)buf_data[ipos++] - 127;
+    if (firstpx == LONG_OVERFLOW) {
+      // See comments in read_uint32_from_bytearray() about
+      // the safety of the unsigned to signed conversion.
+      firstpx = (signed int)read_uint32_from_bytearray(buf_data + ipos);
+      ipos += 4;
+    } else if (firstpx == SHORT_OVERFLOW) {
+      firstpx = (signed short)read_uint16_from_bytearray(buf_data + ipos);
+      ipos += 2;
+    }
+    ret[opos++] = firstpx;
+
+    // For every two blocks
+    for (int k = 0; k < nblock; k++) {
+      const size_t bittypes = buf_data[ipos++];
+      const size_t nbits[2] = {bittypes & 15, (bittypes >> 4) & 15};
+
+      // One pixel is stored using `nbit` bits.
+      // Although `nbit` itself is stored using 4 bits,
+      // only values 1 (0001b) to 8 (1000b) are allowed.
+      // Negative values are encoded as follows. (Not 2's complement!)
+      // - When nbit = 1, the pixel value is 0 or 1
+      // - When nbit = 2, the pixel value is -1, 0, 1, 2
+      // - When nbit = 3, the pixel value is -3, -2, 1, 0, 1, 2, 3, 4
+      // - When nbit - 8, the pixel value is -127, -126, ...,
+      //   127 (== // SHORT_OVERFLOW), 128 (== LONG_OVERFLOW)
+
+      // Load values
+      for (int i = 0; i < 2; i++) {
+        const size_t nbit = nbits[i];
+        assert(nbit >= 0 && nbit <= 8);
+
+        int zero_at = 0;
+        if (nbit > 1) {
+          zero_at = (1 << (nbit - 1)) - 1;
+        }
+
+        // Since nbit is at most 8, 8 * 8 (= BLOCKSIZE) = 64 bits are sufficient.
+        unsigned long long v = 0;
+        for (int j = 0; j < nbit; j++) {
+          // Implicit promotion is only up to 32 bits, not 64 bits so we have to be
+          // explicit.
+          v |= (long long)((unsigned char)buf_data[ipos++]) << (BLOCKSIZE * j);
+        }
+
+        const unsigned long long mask = (1 << nbit) - 1;
+        for (int j = 0; j < BLOCKSIZE; j++) {
+          ret[opos++] = ((v >> (nbit * j)) & mask) - zero_at;
+        }
+      }
+
+      // Apply differences. Load more values when overflown.
+      for (size_t i = opos - 2 * BLOCKSIZE; i < opos; i++) {
+        int offset = ret[i];
+
+        if (offset == LONG_OVERFLOW) {
+          offset = (signed int)read_uint32_from_bytearray(buf_data + ipos);
+          ipos += 4;
+        } else if (offset == SHORT_OVERFLOW) {
+          offset = (signed short)read_uint16_from_bytearray(buf_data + ipos);
+          ipos += 2;
+        }
+
+        ret[i] = offset + ret[i - 1];
+      }
+    }
+
+    for (int i = 0; i < nrest; i++) {
+      int offset = (unsigned char)buf_data[ipos++] - 127;
+
+      if (offset == LONG_OVERFLOW) {
+        offset = (signed int)read_uint32_from_bytearray(buf_data + ipos);
+        ipos += 4;
+      } else if (offset == SHORT_OVERFLOW) {
+        offset = (signed short)read_uint16_from_bytearray(buf_data + ipos);
+        ipos += 2;
+      }
+
+      ret[opos] = ret[opos - 1] + offset;
+      opos++;
+    }
+  }
+}
diff --git a/src/dxtbx/boost_python/compression.h b/src/dxtbx/boost_python/compression.h
index d046b3965..b50b5e73f 100644
--- a/src/dxtbx/boost_python/compression.h
+++ b/src/dxtbx/boost_python/compression.h
@@ -6,6 +6,12 @@
 namespace dxtbx { namespace boost_python {
   unsigned int cbf_decompress(const char*, std::size_t, int*, const std::size_t);
   std::vector<char> cbf_compress(const int*, const std::size_t&);
+  // Decompress Rigaku Oxford diffractometer TY6 compression
+  void rod_TY6_decompress(int* const,
+                          const char* const,
+                          const char* const,
+                          const int,
+                          const int);
 }}  // namespace dxtbx::boost_python
 
 #endif
diff --git a/src/dxtbx/boost_python/ext.cpp b/src/dxtbx/boost_python/ext.cpp
index 5273eb69e..70c69c06b 100644
--- a/src/dxtbx/boost_python/ext.cpp
+++ b/src/dxtbx/boost_python/ext.cpp
@@ -193,6 +193,24 @@ namespace dxtbx { namespace boost_python {
     return PyBytes_FromStringAndSize(&*packed.begin(), packed.size());
   }
 
+  // Python entry point to decompress Rigaku Oxford Diffractometer TY6 compression
+  scitbx::af::flex_int uncompress_rod_TY6(const boost::python::object &data,
+                                          const boost::python::object &offsets,
+                                          const int &slow,
+                                          const int &fast) {
+    // Cannot I extract const char* directly?
+    std::string str_data = boost::python::extract<std::string>(data);
+    std::string str_offsets = boost::python::extract<std::string>(offsets);
+
+    scitbx::af::flex_int z((scitbx::af::flex_grid<>(slow, fast)),
+                           scitbx::af::init_functor_null<int>());
+
+    dxtbx::boost_python::rod_TY6_decompress(
+      z.begin(), str_data.c_str(), str_offsets.c_str(), slow, fast);
+
+    return z;
+  }
+
   void init_module() {
     using namespace boost::python;
     def("read_uint8", read_uint8, (arg("file"), arg("count")));
@@ -206,6 +224,9 @@ namespace dxtbx { namespace boost_python {
     def("is_big_endian", is_big_endian);
     def("uncompress", &uncompress, (arg_("packed"), arg_("slow"), arg_("fast")));
     def("compress", &compress);
+    def("uncompress_rod_TY6",
+        &uncompress_rod_TY6,
+        (arg_("data"), arg_("offsets"), arg_("slow"), arg_("fast")));
   }
 
   BOOST_PYTHON_MODULE(dxtbx_ext) {
diff --git a/src/dxtbx/format/FormatROD.py b/src/dxtbx/format/FormatROD.py
index 417f99f1a..5ce6bd814 100644
--- a/src/dxtbx/format/FormatROD.py
+++ b/src/dxtbx/format/FormatROD.py
@@ -12,9 +12,7 @@
 from __future__ import annotations
 
 __author__ = "David Waterman, Takanori Nakane"
-__copyright__ = (
-    "Copyright 2018 United Kingdom Research and Innovation & 2022 Takanori Nakane"
-)
+__copyright__ = "Copyright 2018-2023 United Kingdom Research and Innovation & 2022-2023 Takanori Nakane"
 __license__ = "BSD 3-clause"
 
 import re
@@ -23,7 +21,9 @@
 import numpy as np
 
 from scitbx.array_family import flex
+from scitbx.math import r3_rotation_axis_and_angle_as_matrix
 
+from dxtbx.ext import uncompress_rod_TY6
 from dxtbx.format.Format import Format
 
 
@@ -153,7 +153,7 @@ def _read_binary_header(
             f.seek(offset + general_nbytes + special_nbytes + 640)
             # detector rotation in degrees along e1, e2, e3
             detector_rotns = struct.unpack("<ddd", f.read(24))
-            # FIXME: direct beam position when all angles are zero?
+            # direct beam position when all angles are zero (FIXME: not completely sure)
             origin_px_x, origin_px_y = struct.unpack("<dd", f.read(16))
             # alpha and beta are angles between KAPPA(=CHI) and THETA, and e3.
             angles_in_deg = struct.unpack(
@@ -198,24 +198,86 @@ def _start(self):
         self._txt_header = self._read_ascii_header(self._image_file)
         self._bin_header = self._read_binary_header(self._image_file)
 
+        self._gonio_start_angles = (
+            np.array(self._bin_header["start_angles_steps"])
+            * np.array(self._bin_header["step_to_rad"])
+            / np.pi
+            * 180
+        )
+        self._gonio_end_angles = (
+            np.array(self._bin_header["end_angles_steps"])
+            * np.array(self._bin_header["step_to_rad"])
+            / np.pi
+            * 180
+        )
+
+        self._scan_axis = -1
+        for axis in [0, 3]:  # 0 - OMEGA, 3 - PHI; the default is omega scan
+            if self._gonio_start_angles[axis] != self._gonio_end_angles[axis]:
+                self._scan_axis = axis
+                break
+
         return
 
-    # Rigaku/Oxford Geometry:
-    # - e3: parallel to OMEGA (and THETA)
-    # - e1: crystal to source
+    # Rigaku/Oxford coordinate system:
+    # - e3: parallel to OMEGA (and THETA), upwards
+    # - e1: along the beam, from the crystal to the source
     # - e2: completes the right handed coordinate system
+    #
+    # The detector origin is at the lower left corner looking from the crystal (or the source).
+    # - slow axis: (roughly) vertical and towards the ceiling
+    # - fast axis: (roughly) horizontal and towards right (at THETA = 0)
+    #
+    # Importantly, positive THETA, OMEGA, PHI rotations are CLOCKWISE looking down from the ceiling,
+    # while positive CHI=KAPPA rotation is CLOCKWISE looking from the detector at OMEGA=0, THETA=0.
+    # See specifications uploaded in https://github.com/cctbx/dxtbx/issues/11#issue-434741559.
+    #
+    # In this dxtbx coordinate system:
+    # - Z: along the beam, from the crystal to the source
+    # - Y: vertical, towards the ceiling
+    # - X: completes the right handed coordinate system
+    # Thus, Z = e1, Y = e3, X = e2.
+    #
+    # The coordinate system of XDS.INP generated by CrysAlisPro:
+    # - Z: along the beam, from the source to the crystal
+    # - Y: vertical, towards the ground
+    # - X: completes the right handed coordinate system
+    # Thus, Y and Z are pointing the opposite directions from DIALS.
 
     def get_goniometer(self, index=None):
         return Format.get_goniometer(self)
 
     def _goniometer(self):
+        if self._scan_axis == -1:
+            raise NotImplementedError("Still shots not implemented yet.")
+        elif self._scan_axis == 0:  # OMEGA
+            dxtbx_scan_axis = 2
+        elif self._scan_axis == 3:  # PHI
+            dxtbx_scan_axis = 0
+        else:
+            pass  # should not happen
+
         # FIXME: sometimes XDS.INP generated by CrysAlisPro has
-        #  tiny deviationis from (0, 1, 0). I don't know how to calculate it.
-        # DIALS' third axis points to the opposite of XDS; thus the direction will be opposite.
-        direction = (0.0, -1.0, 0.0)
+        #  tiny deviations from (0, 1, 0). I don't know how to calculate it.
+        alpha_rad = self._bin_header["angles_in_deg"][0] * np.pi / 180
+        axes = flex.vec3_double(
+            ((0, -1, 0), (0, -np.cos(alpha_rad), np.sin(alpha_rad)), (0, -1, 0))
+        )
 
-        # FIXME: represent kappa axis in fixed_rotation
-        return self._goniometer_factory.known_axis(direction)
+        # angles[self._scan_axis] is not used anyway
+        # angles are in degrees!
+        angles = flex.double(
+            (
+                self._gonio_start_angles[3],
+                self._gonio_start_angles[2],
+                self._gonio_start_angles[0],
+            )
+        )
+        names = flex.std_string(("PHI", "KAPPA=CHI", "OMEGA"))
+
+        return self._goniometer_factory.make_multi_axis_goniometer(
+            axes, angles, names, dxtbx_scan_axis
+        )
 
     def get_beam(self, index=None):
         return Format.get_beam(self)
@@ -232,77 +294,49 @@ def get_detector(self, index=None):
         return Format.get_detector(self)
 
     def _detector(self):
-        gonio_angles = np.array(self._bin_header["start_angles_steps"]) * np.array(
-            self._bin_header["step_to_rad"]
-        )
+        theta_rad = self._gonio_start_angles[1] / 180 * np.pi
         detector_rotns_rad = np.array(self._bin_header["detector_rotns"]) / 180 * np.pi
+
+        # I don't know why only rot_e1 is clockwise but this matches
+        # DIRECTION_OF_DETECTOR_X-AXIS/Y-AXIS in XDS.INP from CrysAlisPro.
+        # Note that XDS.INP's directions of Y and Z are opposite from ours.
         rot_e1 = np.array(
-            [
-                np.cos(detector_rotns_rad[0]),
-                np.sin(detector_rotns_rad[0]),
-                0,
-                -np.sin(detector_rotns_rad[0]),
-                np.cos(detector_rotns_rad[0]),
-                0,
-                0,
-                0,
-                1,
-            ]
-        ).reshape(3, 3)
+            r3_rotation_axis_and_angle_as_matrix([0, 0, 1], detector_rotns_rad[0])
+        ).reshape(
+            3, 3
+        )  # clockwise along e1 = Z
         rot_e2 = np.array(
-            [
-                1,
-                0,
-                0,
-                0,
-                np.cos(detector_rotns_rad[1]),
-                np.sin(detector_rotns_rad[1]),
-                0,
-                -np.sin(detector_rotns_rad[1]),
-                np.cos(detector_rotns_rad[1]),
-            ]
-        ).reshape(3, 3)
+            r3_rotation_axis_and_angle_as_matrix([-1, 0, 0], detector_rotns_rad[1])
+        ).reshape(
+            3, 3
+        )  # ANTI-clockwise along e2 = X
         rot_theta = np.array(
-            [
-                np.cos(gonio_angles[1]),
-                0,
-                np.sin(gonio_angles[1]),
-                0,
-                1,
-                0,
-                -np.sin(gonio_angles[1]),
-                0,
-                np.cos(gonio_angles[1]),
-            ]
-        ).reshape(3, 3)
+            r3_rotation_axis_and_angle_as_matrix([0, -1, 0], theta_rad)
+        ).reshape(
+            3, 3
+        )  # ANTI-clockwise along e3 = Y
         detector_axes = rot_theta.dot(rot_e2.dot(rot_e1))
-        # The third axis points to the opposite of XDS.
-        detector_axes[2, :] *= -1
 
         pixel_size_x = self._bin_header["real_px_size_x"]
         pixel_size_y = self._bin_header["real_px_size_y"]
         origin_at_zero = np.array(
             [
                 -self._bin_header["origin_px_x"] * pixel_size_x,
-                +self._bin_header["origin_px_y"] * pixel_size_y,
-                +self._bin_header["distance_mm"],
+                -self._bin_header["origin_px_y"] * pixel_size_y,
+                -self._bin_header["distance_mm"],
             ]
         )
-
-        # FIXME: this formula seem to give the right answer but I don't know why.
-        # XDS exporter in IPR's CrysAlisPro seems broken. It writes the same ORGX/Y
-        # regardless of the theta angles.
         origin = detector_axes.dot(origin_at_zero)
 
         detector = self._detector_factory.make_detector(
             "PAD",
             detector_axes[:, 0],
-            -detector_axes[:, 1],
+            detector_axes[:, 1],
             origin,
             (pixel_size_x, pixel_size_y),
             (self._txt_header["NX"], self._txt_header["NY"]),
             (0, self._bin_header["overflow_threshold"]),
-        )  # not sure about min
+        )
 
         return detector
 
@@ -312,21 +346,8 @@ def get_scan(self, index=None):
     def _scan(self):
         """Return the scan information for this image."""
 
-        for axis in [0, 3]:  # 0 - OMEGA, 3 - PHI: FIXME: is it always PHI scan?
-            start_angle = (
-                self._bin_header["start_angles_steps"][axis]
-                * self._bin_header["step_to_rad"][axis]
-                / np.pi
-                * 180
-            )
-            end_angle = (
-                self._bin_header["end_angles_steps"][axis]
-                * self._bin_header["step_to_rad"][axis]
-                / np.pi
-                * 180
-            )
-            if start_angle != end_angle:
-                break
+        start_angle = self._gonio_start_angles[self._scan_axis]
+        end_angle = self._gonio_end_angles[self._scan_axis]
 
         return self._scan_factory.single_file(
             filename=self._image_file,
@@ -339,10 +360,23 @@ def _scan(self):
     def get_raw_data(self):
         comp = self._txt_header["compression"].strip()
         if comp.startswith("TY6"):
-            return self._get_raw_data_ty6()
+            return self._get_raw_data_ty6_native()
         else:
             raise NotImplementedError("Can't handle compression: {0}".format(comp))
 
+    def _get_raw_data_ty6_native(self):
+        offset = self._txt_header["NHEADER"]
+        nx = self._txt_header["NX"]
+        ny = self._txt_header["NY"]
+        with open(self._image_file, "rb") as f:
+            f.seek(offset)
+            lbytesincompressedfield = struct.unpack("<l", f.read(4))[0]
+            linedata = f.read(lbytesincompressedfield)
+            offsets = f.read(4 * ny)
+
+            return uncompress_rod_TY6(linedata, offsets, ny, nx)
+
+    # Python implementation
     def _get_raw_data_ty6(self):
         offset = self._txt_header["NHEADER"]
         nx = self._txt_header["NX"]
@@ -391,7 +425,6 @@ def decode_TY6_oneline(self, linedata, w):
             bittype = linedata[ipos]
             nbits = (bittype & 15, (bittype >> 4) & 15)
             ipos += 1
-            # ipos_bit = ipos * 8
 
             for i in range(2):
                 nbit = nbits[i]
@@ -455,5 +488,12 @@ def decode_TY6_oneline(self, linedata, w):
             reader = FormatROD(arg)
             print(reader._txt_header)
             print(reader._bin_header)
+            print()
+            print(
+                "Starting angles in degrees (OMEGA, THETA, KAPPA=CHI, PHI, OMEGA PRIME, THETA PRIME)"
+            )
+            print(reader._gonio_start_angles)
+            print("Ending angles in degrees")
+            print(reader._gonio_end_angles)
         else:
             print("Unsupported format")