Merge pull request #214 from DimitriPapadopoulos/unicode

Update Unicode handling for Python 3
holgern · Jun 27, 2023 · fca95ce · fca95ce
2 parents ac610b4 + 20fbc68
commit fca95ce
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 40 deletions.
diff --git a/pyedflib/_extensions/_pyedflib.pyx b/pyedflib/_extensions/_pyedflib.pyx
@@ -200,7 +200,7 @@ cdef class CyEdfReader:
         """
         open(file_name, annotations_mode, check_file_size)
         """
-        file_name_str = file_name.encode('utf8','strict')
+        file_name_str = file_name.encode('utf_8','strict')
         result = c_edf.edfopen_file_readonly(file_name_str, &self.hdr, annotations_mode, check_file_size)
 
         self.file_name = file_name
@@ -427,31 +427,6 @@ cdef class CyEdfReader:
 # low level functions
 
 
-
-cdef unicode _ustring(s):
-    if type(s) is unicode:
-        # fast path for most common case(s)
-        return <unicode>s
-    elif PY_MAJOR_VERSION < 3 and isinstance(s, bytes):
-        # only accept byte strings in Python 2.x, not in Py3
-        return (<bytes>s).decode('ascii')
-    elif isinstance(s, unicode):
-        # an evil cast to <unicode> might work here in some(!) cases,
-        # depending on what the further processing does.  to be safe,
-        # we can always create a copy instead
-        return unicode(s)
-    else:
-        raise TypeError()
-
-# define a global name for whatever char type is used in the module
-ctypedef unsigned char char_type
-
-cdef char_type[:] _chars(s):
-    if isinstance(s, unicode):
-        # encode to the specific encoding used inside of the module
-        s = (<unicode>s).encode('utf8')
-    return s
-
 def set_patientcode(int handle, char *patientcode):
     # check if rw?
     return c_edf.edf_set_patientcode(handle, patientcode)
@@ -529,8 +504,8 @@ def get_handle(file_number):
     return c_edf.edflib_get_handle(file_number)
 
 def is_file_used(path):
-    path_str = _ustring(path).encode('utf8','strict')
-    return c_edf.edflib_is_file_used(path_str)
+    path_byte = path.encode('utf_8','strict')
+    return c_edf.edflib_is_file_used(path_byte)
 
 # so you can use the same name if defining a python only function
 def set_physical_maximum(handle, edfsignal, phys_max):
@@ -556,7 +531,7 @@ def open_file_writeonly(path, filetype, number_of_signals):
             with open(path, 'wb'): pass
             path = get_short_path_name(path)
 
-    py_byte_string  = _ustring(path).encode('utf8','strict')
+    py_byte_string  = path.encode('utf_8','strict')
     cdef char* path_str = py_byte_string
     return c_edf.edfopen_file_writeonly(path_str, filetype, number_of_signals)
 

diff --git a/pyedflib/edfreader.py b/pyedflib/edfreader.py
@@ -126,7 +126,7 @@ def _convert_string(self,s):
         if isinstance(s, bytes):
             return s.decode("latin")
         else:
-            return s.decode("utf-8", "strict")
+            return s.decode("utf_8", "strict")
 
     def getHeader(self):
         """

diff --git a/pyedflib/edfwriter.py b/pyedflib/edfwriter.py
@@ -105,14 +105,14 @@ def check_signal_header_correct(channels, i, file_type):
 
 
 def u(x):
-    return x.decode("utf-8", "strict")
+    return x.decode("utf_8", "strict")
 
 
 def du(x):
     if isinstance(x, bytes):
         return x
     else:
-        return x.encode("utf-8")
+        return x.encode("utf_8")
 
 
 def isstr(s):
@@ -861,7 +861,7 @@ def writeSamples(self, data_list, digital = False):
                 if success<0:
                     raise OSError(f'Unknown error while calling writeSamples: {success}')
 
-    def writeAnnotation(self, onset_in_seconds, duration_in_seconds, description, str_format='utf-8'):
+    def writeAnnotation(self, onset_in_seconds, duration_in_seconds, description, str_format='utf_8'):
         """
         Writes an annotation/event to the file
         """
@@ -871,7 +871,7 @@ def writeAnnotation(self, onset_in_seconds, duration_in_seconds, description, st
         if isinstance(duration_in_seconds, bytes):
             duration_in_seconds = float(duration_in_seconds)
 
-        if str_format == 'utf-8':
+        if str_format == 'utf_8':
             if duration_in_seconds >= 0:
                 return write_annotation_utf8(self.handle, np.round(onset_in_seconds*10000).astype(np.int64), np.round(duration_in_seconds*10000).astype(int), du(description))
             else:

diff --git a/util/gh_lists.py b/util/gh_lists.py
@@ -119,7 +119,6 @@ def __init__(self, filename):
             self.cache = {}
 
     def get(self, url):
-        url = unicode(url)
         if url not in self.cache:
             print("[gh_lists] get:", url, file=sys.stderr)
             req = urlopen(url)

diff --git a/util/refguide_check.py b/util/refguide_check.py
@@ -312,11 +312,7 @@ def check_rest(module, names, dots=True):
     Returns: [(name, success_flag, output), ...]
     """
 
-    try:
-        skip_types = (dict, str, unicode, float, int)
-    except NameError:
-        # python 3
-        skip_types = (dict, str, float, int)
+    skip_types = (dict, str, float, int)
 
     results = []