From 20fbc6812f9d5c761ea6790d7520504a1dc602c6 Mon Sep 17 00:00:00 2001
From: Dimitri Papadopoulos
 <3234522+DimitriPapadopoulos@users.noreply.github.com>
Date: Mon, 5 Jun 2023 07:47:17 +0200
Subject: [PATCH] Get rid of more Python 2 Unicode-related code

- Get rid of unicode(). In Python 3, `unicode` is an alias of `str`.
  No need to cast a `str` to a `str`.

- Consistently use the base name `utf_8` for the UTF-8 codec.
  https://docs.python.org/3/library/codecs.html#standard-encodings

- Remove a piece of code copied from
  https://cython.readthedocs.io/en/latest/src/tutorial/strings.html
  Replace with the relevant code from teh overhauled Python 3 doc:
  https://github.com/minrk/cython-docs/blob/master/src/tutorial/strings.rst
---
 pyedflib/_extensions/_pyedflib.pyx | 33 ++++--------------------------
 pyedflib/edfreader.py              |  2 +-
 pyedflib/edfwriter.py              |  8 ++++----
 util/gh_lists.py                   |  1 -
 util/refguide_check.py             |  6 +-----
 5 files changed, 10 insertions(+), 40 deletions(-)
diff --git a/pyedflib/_extensions/_pyedflib.pyx b/pyedflib/_extensions/_pyedflib.pyx
index 033938f..f690cbe 100644
--- a/pyedflib/_extensions/_pyedflib.pyx
+++ b/pyedflib/_extensions/_pyedflib.pyx
@@ -200,7 +200,7 @@ cdef class CyEdfReader:
         """
         open(file_name, annotations_mode, check_file_size)
         """
-        file_name_str = file_name.encode('utf8','strict')
+        file_name_str = file_name.encode('utf_8','strict')
         result = c_edf.edfopen_file_readonly(file_name_str, &self.hdr, annotations_mode, check_file_size)
 
         self.file_name = file_name
@@ -427,31 +427,6 @@ cdef class CyEdfReader:
 # low level functions
 
 
-
-cdef unicode _ustring(s):
-    if type(s) is unicode:
-        # fast path for most common case(s)
-        return <unicode>s
-    elif PY_MAJOR_VERSION < 3 and isinstance(s, bytes):
-        # only accept byte strings in Python 2.x, not in Py3
-        return (<bytes>s).decode('ascii')
-    elif isinstance(s, unicode):
-        # an evil cast to <unicode> might work here in some(!) cases,
-        # depending on what the further processing does.  to be safe,
-        # we can always create a copy instead
-        return unicode(s)
-    else:
-        raise TypeError()
-
-# define a global name for whatever char type is used in the module
-ctypedef unsigned char char_type
-
-cdef char_type[:] _chars(s):
-    if isinstance(s, unicode):
-        # encode to the specific encoding used inside of the module
-        s = (<unicode>s).encode('utf8')
-    return s
-
 def set_patientcode(int handle, char *patientcode):
     # check if rw?
     return c_edf.edf_set_patientcode(handle, patientcode)
@@ -529,8 +504,8 @@ def get_handle(file_number):
     return c_edf.edflib_get_handle(file_number)
 
 def is_file_used(path):
-    path_str = _ustring(path).encode('utf8','strict')
-    return c_edf.edflib_is_file_used(path_str)
+    path_byte = path.encode('utf_8','strict')
+    return c_edf.edflib_is_file_used(path_byte)
 
 # so you can use the same name if defining a python only function
 def set_physical_maximum(handle, edfsignal, phys_max):
@@ -556,7 +531,7 @@ def open_file_writeonly(path, filetype, number_of_signals):
             with open(path, 'wb'): pass
             path = get_short_path_name(path)
 
-    py_byte_string  = _ustring(path).encode('utf8','strict')
+    py_byte_string  = path.encode('utf_8','strict')
     cdef char* path_str = py_byte_string
     return c_edf.edfopen_file_writeonly(path_str, filetype, number_of_signals)
 
diff --git a/pyedflib/edfreader.py b/pyedflib/edfreader.py
index 592e96d..2730f62 100644
--- a/pyedflib/edfreader.py
+++ b/pyedflib/edfreader.py
@@ -126,7 +126,7 @@ def _convert_string(self,s):
         if isinstance(s, bytes):
             return s.decode("latin")
         else:
-            return s.decode("utf-8", "strict")
+            return s.decode("utf_8", "strict")
 
     def getHeader(self):
         """
diff --git a/pyedflib/edfwriter.py b/pyedflib/edfwriter.py
index f0c4bf1..6d99b83 100644
--- a/pyedflib/edfwriter.py
+++ b/pyedflib/edfwriter.py
@@ -105,14 +105,14 @@ def check_signal_header_correct(channels, i, file_type):
 
 
 def u(x):
-    return x.decode("utf-8", "strict")
+    return x.decode("utf_8", "strict")
 
 
 def du(x):
     if isinstance(x, bytes):
         return x
     else:
-        return x.encode("utf-8")
+        return x.encode("utf_8")
 
 
 def isstr(s):
@@ -861,7 +861,7 @@ def writeSamples(self, data_list, digital = False):
                 if success<0:
                     raise OSError(f'Unknown error while calling writeSamples: {success}')
 
-    def writeAnnotation(self, onset_in_seconds, duration_in_seconds, description, str_format='utf-8'):
+    def writeAnnotation(self, onset_in_seconds, duration_in_seconds, description, str_format='utf_8'):
         """
         Writes an annotation/event to the file
         """
@@ -871,7 +871,7 @@ def writeAnnotation(self, onset_in_seconds, duration_in_seconds, description, st
         if isinstance(duration_in_seconds, bytes):
             duration_in_seconds = float(duration_in_seconds)
 
-        if str_format == 'utf-8':
+        if str_format == 'utf_8':
             if duration_in_seconds >= 0:
                 return write_annotation_utf8(self.handle, np.round(onset_in_seconds*10000).astype(np.int64), np.round(duration_in_seconds*10000).astype(int), du(description))
             else:
diff --git a/util/gh_lists.py b/util/gh_lists.py
index 68638ef..786ef8f 100644
--- a/util/gh_lists.py
+++ b/util/gh_lists.py
@@ -119,7 +119,6 @@ def __init__(self, filename):
             self.cache = {}
 
     def get(self, url):
-        url = unicode(url)
         if url not in self.cache:
             print("[gh_lists] get:", url, file=sys.stderr)
             req = urlopen(url)
diff --git a/util/refguide_check.py b/util/refguide_check.py
index f823790..fc932cc 100644
--- a/util/refguide_check.py
+++ b/util/refguide_check.py
@@ -312,11 +312,7 @@ def check_rest(module, names, dots=True):
     Returns: [(name, success_flag, output), ...]
     """
 
-    try:
-        skip_types = (dict, str, unicode, float, int)
-    except NameError:
-        # python 3
-        skip_types = (dict, str, float, int)
+    skip_types = (dict, str, float, int)
 
     results = []