Skip to content

Commit

Permalink
refactor: ♻️ Refactor npTDMS
Browse files Browse the repository at this point in the history
Refactoring npTDMS via:

1. https://github.com/sourcery-ai/sourcery
2. `autopep8 --in-place --aggressive --aggressive ./nptdms/**`
  • Loading branch information
Anselmoo committed Nov 22, 2022
1 parent 92d3573 commit f1f7050
Show file tree
Hide file tree
Showing 25 changed files with 336 additions and 400 deletions.
3 changes: 1 addition & 2 deletions nptdms/base_segment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from io import UnsupportedOperation
import numpy as np

from nptdms.log import log_manager
Expand Down Expand Up @@ -32,7 +31,7 @@ def scaler_data_types(self):
return None

def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, self.path)
return f"{self.__class__.__name__}({self.path})"


class BaseDataReader(object):
Expand Down
10 changes: 4 additions & 6 deletions nptdms/channel_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,7 @@ class ListDataReceiver(object):
def __init__(self, channel):
"""Initialise new data receiver for a TDMS object
"""
if channel.data_type == types.String:
self._dtype = np.dtype('O')
else:
self._dtype = None
self._dtype = np.dtype('O') if channel.data_type == types.String else None
self._data = []
self.scaler_data = {}

Expand Down Expand Up @@ -183,8 +180,9 @@ def slice_raw_data(raw_data, offset, length=None):
return raw_data
end = None if length is None else offset + length
data = None if raw_data.data is None else raw_data.data[offset:end]
scaler_data = dict(
(scale_id, scaler_data[offset:end]) for (scale_id, scaler_data) in raw_data.scaler_data.items())
scaler_data = {scale_id: scaler_data[offset:end] for (scale_id, scaler_data)
in raw_data.scaler_data.items()}

return RawDataSlice(data, scaler_data)


Expand Down
3 changes: 2 additions & 1 deletion nptdms/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ class ObjectPath(object):
:ivar group: Group name or None for the root object
:ivar channel: Channel name or None for the root object or a group objecct
"""

def __init__(self, *path_components):
self.group = None
self.channel = None
if len(path_components) > 0:
if path_components:
self.group = path_components[0]
if len(path_components) > 1:
self.channel = path_components[1]
Expand Down
88 changes: 40 additions & 48 deletions nptdms/daqmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
class DaqmxDataReader(BaseDataReader):
""" A TDMS segment with DAQmx data
"""

def _read_data_chunk(self, file, data_objects, chunk_index):
"""Read data from DAQmx data segment"""

Expand All @@ -40,7 +41,7 @@ def _read_data_chunk(self, file, data_objects, chunk_index):

# Now get arrays for each scaler of each channel where the scaler
# data comes from this raw buffer
for (i, obj) in enumerate(data_objects):
for obj in data_objects:
scalers_for_raw_buffer_index = [
scaler for scaler in obj.daqmx_metadata.scalers
if scaler.raw_buffer_index == raw_buffer_index]
Expand All @@ -60,9 +61,9 @@ def _read_data_chunk(self, file, data_objects, chunk_index):
else:
data[obj.path] = processed_data

combined_data = {}
for path, data in data.items():
combined_data[path] = RawChannelDataChunk.channel_data(data)
combined_data = {path: RawChannelDataChunk.channel_data(data)
for path, data in data.items()}

for path, data in scaler_data.items():
combined_data[path] = RawChannelDataChunk.scaler_data(data)
return RawDataChunk(combined_data)
Expand Down Expand Up @@ -92,10 +93,10 @@ def get_daqmx_final_chunk_lengths(ordered_objects, chunk_size_bytes):
for obj in ordered_objects:
if not obj.has_data:
continue
buffer_indices = list(set(s.raw_buffer_index for s in obj.daqmx_metadata.scalers))
buffer_indices = list({s.raw_buffer_index for s in obj.daqmx_metadata.scalers})
if len(buffer_indices) == 1:
object_lengths[obj.path] = updated_buffer_lengths[buffer_indices[0]]
# Else scalers are in different buffers, not sure this is even valid
# Else scalers are in different buffers, not sure this is even valid
return object_lengths


Expand All @@ -109,14 +110,12 @@ def get_buffer_dimensions(ordered_objects):
continue
daqmx_metadata = o.daqmx_metadata
if dimensions is None:
raw_data_widths = daqmx_metadata.raw_data_widths
# Set width for each buffer
dimensions = [(0, w) for w in raw_data_widths]
else:
if not _lists_are_equal(daqmx_metadata.raw_data_widths, raw_data_widths):
raise ValueError(
"Raw data widths for object %r (%s) do not match previous widths (%s)" %
(o, daqmx_metadata.raw_data_widths, raw_data_widths))
dimensions = [(0, w) for w in daqmx_metadata.raw_data_widths]
elif not _lists_are_equal(daqmx_metadata.raw_data_widths, raw_data_widths):
raise ValueError(
"Raw data widths for object %r (%s) do not match previous widths (%s)" %
(o, daqmx_metadata.raw_data_widths, raw_data_widths))
# Now set the buffer number of values based on the object chunk size
for scaler in daqmx_metadata.scalers:
buffer_index = scaler.raw_buffer_index
Expand Down Expand Up @@ -152,8 +151,8 @@ def read_raw_data_index(self, f, raw_data_index_header, endianness):
data_type_val = types.Uint32.read(f, endianness)
try:
self.data_type = types.tds_data_types[data_type_val]
except KeyError:
raise KeyError("Unrecognised data type: %s" % data_type_val)
except KeyError as e:
raise KeyError(f"Unrecognised data type: {data_type_val}") from e

daqmx_metadata = DaqMxMetadata(f, endianness, raw_data_index_header, self.data_type)
log.debug("DAQmx metadata: %r", daqmx_metadata)
Expand All @@ -166,9 +165,7 @@ def read_raw_data_index(self, f, raw_data_index_header, endianness):
def scaler_data_types(self):
if self.daqmx_metadata is None:
return None
return dict(
(s.scale_id, s.data_type)
for s in self.daqmx_metadata.scalers)
return {s.scale_id: s.data_type for s in self.daqmx_metadata.scalers}


class DaqMxMetadata(object):
Expand All @@ -179,17 +176,17 @@ class DaqMxMetadata(object):
'chunk_size',
'raw_data_widths',
'scalers',
]
]

def __init__(self, f, endianness, scaler_type, channel_data_type):
"""
Read the metadata for a DAQmx raw segment. This is the raw
DAQmx-specific portion of the raw data index.
"""
metadata_bytes = f.read(16)
(dimension,
self.chunk_size,
scaler_vector_length) = _struct_unpack(endianness + 'LQL', metadata_bytes)
(dimension, self.chunk_size, scaler_vector_length) = _struct_unpack(
f'{endianness}LQL', metadata_bytes
)

# In TDMS format version 2.0, 1 is the only valid value for dimension
if dimension != 1:
Expand All @@ -202,11 +199,17 @@ def __init__(self, f, endianness, scaler_type, channel_data_type):

if channel_data_type != types.DaqMxRawData:
if scaler_vector_length != 1:
raise ValueError("Expected only one scaler for channel with type %s" % channel_data_type.__name__)
raise ValueError(
"Expected only one scaler for channel"
f" with type {channel_data_type.__name__}"
)

if self.scalers[0].data_type != channel_data_type:
raise ValueError(
"Expected scaler data type to be %s but got %s" %
(channel_data_type.__name__, self.scalers[0].data_type.__name__))
"Expected scaler data type to be"
f" {channel_data_type.__name__} but got"
f" {self.scalers[0].data_type.__name__}"
)

# Read raw data widths.
# This is an array of widths in bytes, which should be the same
Expand All @@ -221,12 +224,10 @@ def __init__(self, f, endianness, scaler_type, channel_data_type):
def __repr__(self):
""" Return string representation of DAQmx metadata
"""
properties = (
"%s=%s" % (name, _get_attr_repr(self, name))
for name in self.__slots__)
properties = (f"{name}={_get_attr_repr(self, name)}" for name in self.__slots__)

properties_list = ", ".join(properties)
return "%s(%s)" % (self.__class__.__name__, properties_list)
return f"{self.__class__.__name__}({properties_list})"


class DaqMxScaler(object):
Expand All @@ -239,7 +240,7 @@ class DaqMxScaler(object):
'raw_buffer_index',
'raw_byte_offset',
'sample_format_bitmap',
]
]

def __init__(self, open_file, endianness):
scaler_bytes = open_file.read(20)
Expand All @@ -248,7 +249,7 @@ def __init__(self, open_file, endianness):
self.raw_buffer_index,
self.raw_byte_offset,
self.sample_format_bitmap,
self.scale_id) = _struct_unpack(endianness + 'LLLLL', scaler_bytes)
self.scale_id) = _struct_unpack(f'{endianness}LLLLL', scaler_bytes)

self.data_type = DAQMX_TYPES[data_type_code]

Expand All @@ -259,12 +260,10 @@ def postprocess_data(self, data):
return data

def __repr__(self):
properties = (
"%s=%s" % (name, _get_attr_repr(self, name))
for name in self.__slots__)
properties = (f"{name}={_get_attr_repr(self, name)}" for name in self.__slots__)

properties_list = ", ".join(properties)
return "%s(%s)" % (self.__class__.__name__, properties_list)
return f"{self.__class__.__name__}({properties_list})"


class DigitalLineScaler(object):
Expand All @@ -277,16 +276,13 @@ class DigitalLineScaler(object):
'raw_buffer_index',
'raw_bit_offset',
'sample_format_bitmap',
]
]

def __init__(self, open_file, endianness):
scaler_bytes = open_file.read(17)

(data_type_code,
self.raw_buffer_index,
self.raw_bit_offset,
self.sample_format_bitmap,
self.scale_id) = _struct_unpack(endianness + 'LLLBL', scaler_bytes)
(data_type_code, self.raw_buffer_index, self.raw_bit_offset, self.sample_format_bitmap,
self.scale_id) = _struct_unpack(f'{endianness}LLLBL', scaler_bytes)

self.data_type = DAQMX_TYPES[data_type_code]

Expand All @@ -299,19 +295,15 @@ def postprocess_data(self, data):
return np.right_shift(np.bitwise_and(data, bitmask), bit_offset)

def __repr__(self):
properties = (
"%s=%s" % (name, _get_attr_repr(self, name))
for name in self.__slots__)
properties = (f"{name}={_get_attr_repr(self, name)}" for name in self.__slots__)

properties_list = ", ".join(properties)
return "%s(%s)" % (self.__class__.__name__, properties_list)
return f"{self.__class__.__name__}({properties_list})"


def _get_attr_repr(obj, attr_name):
val = getattr(obj, attr_name)
if isinstance(val, type):
return val.__name__
return repr(val)
return val.__name__ if isinstance(val, type) else repr(val)


def _lists_are_equal(a, b):
Expand Down
2 changes: 1 addition & 1 deletion nptdms/export/hdf_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def from_tdms_file(tdms_file, filepath, mode='w', group='/'):

# Write properties and data for each channel
for channel in group.channels():
channel_key = group.name + '/' + channel.name
channel_key = f'{group.name}/{channel.name}'

if channel.data_type is types.String:
# Encode as variable length UTF-8 strings
Expand Down
16 changes: 7 additions & 9 deletions nptdms/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(self, tdms_file):
self._file_path = source_path
self._file = open(self._file_path, "rb")

filepath = self._file_path + '_index'
filepath = f'{self._file_path}_index'
if os.path.isfile(filepath):
self._index_file_path = filepath
self._index_file = open(self._index_file_path, "rb")
Expand Down Expand Up @@ -142,8 +142,7 @@ def read_raw_data(self):
"Cannot read data unless metadata has first been read")
for segment in self._segments:
self._verify_segment_start(segment)
for chunk in segment.read_raw_data(self._file):
yield chunk
yield from segment.read_raw_data(self._file)

def read_raw_data_for_channel(self, channel_path, offset=0, length=None):
""" Read raw data for a single channel, chunk by chunk
Expand Down Expand Up @@ -299,7 +298,7 @@ def _read_lead_in(self, file, segment_position, is_index_file=False):
endianness = '>' if (toc_mask & toc_properties['kTocBigEndian']) else '<'

# Next four bytes are version number, then 8 bytes each for the offset values
(version, next_segment_offset, raw_data_offset) = _struct_unpack(endianness + 'lQQ', lead_in_bytes[8:28])
(version, next_segment_offset, raw_data_offset) = _struct_unpack(f'{endianness}lQQ', lead_in_bytes[8:28])

if self.tdms_version is None:
if version not in (4712, 4713):
Expand Down Expand Up @@ -328,7 +327,7 @@ def _read_lead_in(self, file, segment_position, is_index_file=False):
log.debug("Next segment offset = %d, raw data offset = %d, data size = %d b",
next_segment_offset, raw_data_offset, next_segment_offset - raw_data_offset)
next_segment_pos = (
segment_position + next_segment_offset + lead_size)
segment_position + next_segment_offset + lead_size)

return segment_position, toc_mask, data_position, next_segment_pos, segment_incomplete

Expand Down Expand Up @@ -464,6 +463,7 @@ def _update_object_scaler_data_types(path, obj, segment_object):
class ObjectMetadata(object):
""" Stores information about an object in a TDMS file
"""

def __init__(self):
self.properties = OrderedDict()
self.data_type = None
Expand All @@ -474,10 +474,8 @@ def __init__(self):
def _trim_channel_chunk(chunk, skip=0, trim=0):
if skip == 0 and trim == 0:
return chunk
data = None
scaler_data = None
if chunk.data is not None:
data = chunk.data[skip:len(chunk.data) - trim]
data = None if chunk.data is None else chunk.data[skip:len(chunk.data) - trim]
if chunk.scaler_data is not None:
scaler_data = {
scale_id: d[skip:len(d) - trim]
Expand Down Expand Up @@ -506,6 +504,6 @@ def _array_equal(a, b, chunk_size=100):
num_chunks = (len(a) + chunk_size - 1) // chunk_size
for i in range(num_chunks):
offset = i * chunk_size
if not (a[offset:offset+chunk_size] == b[offset:offset+chunk_size]).all():
if not (a[offset:offset + chunk_size] == b[offset:offset + chunk_size]).all():
return False
return True
Loading

0 comments on commit f1f7050

Please sign in to comment.