use raw strings for regex

Changed in version 3.12: A backslash-character pair that is not a valid escape sequence now generates a SyntaxWarning, instead of DeprecationWarning. For example, re.compile("\d+\. \d+") now emits a SyntaxWarning ("\d" is an invalid escape sequence, use raw strings for regular expression: re.compile(r"\d+\.\d+")). In a future Python version, SyntaxError will eventually be raised, instead of SyntaxWarning. (Contributed by Victor Stinner in llvmgh-98401.) Closes: llvm#97815 See-also: https://docs.python.org/3/reference/lexical_analysis.html#escape-sequences Signed-off-by: Paul Zander <[email protected]>
negril · Aug 21, 2024 · 7b861d5 · 7b861d5
1 parent 1193f7d
commit 7b861d5
Show file tree

Hide file tree

Showing 98 changed files with 325 additions and 325 deletions.
diff --git a/.github/workflows/version-check.py b/.github/workflows/version-check.py
@@ -6,7 +6,7 @@
 
 
 def get_version_from_tag(tag):
-    m = re.match("llvmorg-([0-9]+)\.([0-9]+)\.([0-9]+)(-rc[0-9]+)?$", tag)
+    m = re.match(r"llvmorg-([0-9]+)\.([0-9]+)\.([0-9]+)(-rc[0-9]+)?$", tag)
     if m:
         if m.lastindex == 4:
             # We have an rc tag.

diff --git a/clang/docs/tools/dump_ast_matchers.py b/clang/docs/tools/dump_ast_matchers.py
@@ -101,7 +101,7 @@ def extract_result_types(comment):
 
 
 def strip_doxygen(comment):
-    """Returns the given comment without \-escaped words."""
+    r"""Returns the given comment without \-escaped words."""
     # If there is only a doxygen keyword in the line, delete the whole line.
     comment = re.sub(r"^\\[^\s]+\n", r"", comment, flags=re.M)
 
@@ -236,7 +236,7 @@ def act_on_decl(declaration, comment, allowed_types):
 
         # Parse the various matcher definition macros.
         m = re.match(
-            """.*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
+            r""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
                        \s*([^\s,]+\s*),
                        \s*(?:[^\s,]+\s*),
                        \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)

diff --git a/clang/test/Analysis/check-analyzer-fixit.py b/clang/test/Analysis/check-analyzer-fixit.py
@@ -55,7 +55,7 @@ def run_test_once(args, extra_args):
     # themselves.  We need to keep the comments to preserve line numbers while
     # avoiding empty lines which could potentially trigger formatting-related
     # checks.
-    cleaned_test = re.sub("// *CHECK-[A-Z0-9\-]*:[^\r\n]*", "//", input_text)
+    cleaned_test = re.sub(r"// *CHECK-[A-Z0-9\-]*:[^\r\n]*", "//", input_text)
     write_file(temp_file_name, cleaned_test)
 
     original_file_name = temp_file_name + ".orig"

diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py
@@ -316,7 +316,7 @@ def symbolize(self, addr, binary, offset):
         #   * For C functions atos omits parentheses and argument types.
         #   * For C++ functions the function name (i.e., `foo` above) may contain
         #     templates which may contain parentheses.
-        match = re.match("^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
+        match = re.match(r"^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
         logging.debug("atos_line: %s", atos_line)
         if match:
             function_name = match.group(1)
@@ -541,7 +541,7 @@ def process_line_posix(self, line):
         # names in the regex because it could be an
         # Objective-C or C++ demangled name.
         stack_trace_line_format = (
-            "^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
+            r"^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
         )
         match = re.match(stack_trace_line_format, line)
         if not match:

diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py
@@ -128,7 +128,7 @@ def get_address_object(address_name: str, offset: int = 0):
 
 
 def _search_line_for_cmd_start(line: str, start: int, valid_commands: dict) -> int:
-    """Scan `line` for a string matching any key in `valid_commands`.
+    r"""Scan `line` for a string matching any key in `valid_commands`.
 
     Start searching from `start`.
     Commands escaped with `\` (E.g. `\DexLabel('a')`) are ignored.
@@ -543,7 +543,7 @@ def test_parse_share_line(self):
     def test_parse_escaped(self):
         """Escaped commands are ignored."""
 
-        lines = ['words \MockCmd("IGNORED") words words words\n']
+        lines = [r'words \MockCmd("IGNORED") words words words\n']
 
         values = self._find_all_mock_values_in_lines(lines)
 

diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
@@ -223,7 +223,7 @@ def can_target_host():
     xcode_lldb_vers = subprocess.check_output(["xcrun", "lldb", "--version"]).decode(
         "utf-8"
     )
-    match = re.search("lldb-(\d+)", xcode_lldb_vers)
+    match = re.search(r"lldb-(\d+)", xcode_lldb_vers)
     if match:
         apple_lldb_vers = int(match.group(1))
         if apple_lldb_vers < 1000:
@@ -247,7 +247,7 @@ def get_gdb_version_string():
     if len(gdb_vers_lines) < 1:
         print("Unkown GDB version format (too few lines)", file=sys.stderr)
         return None
-    match = re.search("GNU gdb \(.*?\) ((\d|\.)+)", gdb_vers_lines[0].strip())
+    match = re.search(r"GNU gdb \(.*?\) ((\d|\.)+)", gdb_vers_lines[0].strip())
     if match is None:
         print(f"Unkown GDB version format: {gdb_vers_lines[0]}", file=sys.stderr)
         return None
@@ -261,7 +261,7 @@ def get_clang_default_dwarf_version_string(triple):
     # Get the flags passed by the driver and look for -dwarf-version.
     cmd = f'{llvm_config.use_llvm_tool("clang")} -g -xc  -c - -v -### --target={triple}'
     stderr = subprocess.run(cmd.split(), stderr=subprocess.PIPE).stderr.decode()
-    match = re.search("-dwarf-version=(\d+)", stderr)
+    match = re.search(r"-dwarf-version=(\d+)", stderr)
     if match is None:
         print("Cannot determine default dwarf version", file=sys.stderr)
         return None

diff --git a/libcxx/utils/synchronize_csv_status_files.py b/libcxx/utils/synchronize_csv_status_files.py
@@ -284,7 +284,7 @@ def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]:
             results.append(gh.for_printing())
             continue
         elif paper.status != gh.status:
-            print(f"We found a CSV row and a Github issue with different statuses:\nrow: {row}\Github issue: {gh}")
+            print(rf"We found a CSV row and a Github issue with different statuses:\nrow: {row}\Github issue: {gh}")
         results.append(row)
 
     return results

diff --git a/lld/test/MachO/tools/validate-unwind-info.py b/lld/test/MachO/tools/validate-unwind-info.py
@@ -11,7 +11,7 @@
 
 
 def main():
-    hex = "[a-f\d]"
+    hex = r"[a-f\d]"
     hex8 = hex + "{8}"
 
     parser = argparse.ArgumentParser(description=__doc__)

diff --git a/lld/utils/benchmark.py b/lld/utils/benchmark.py
@@ -51,7 +51,7 @@ def __str__(self):
 def getBenchmarks():
     ret = []
     for i in glob.glob("*/response*.txt"):
-        m = re.match("response-(.*)\.txt", os.path.basename(i))
+        m = re.match(r"response-(.*)\.txt", os.path.basename(i))
         variant = m.groups()[0] if m else None
         ret.append(Bench(os.path.dirname(i), variant))
     return ret

diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py
@@ -296,7 +296,7 @@ class DarwinImage(symbolication.Image):
             except:
                 dsymForUUIDBinary = ""
 
-        dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*")
+        dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*")
 
         def __init__(
             self, text_addr_lo, text_addr_hi, identifier, version, uuid, path, verbose
@@ -501,7 +501,7 @@ def find_image_with_identifier(self, identifier):
         for image in self.images:
             if image.identifier == identifier:
                 return image
-        regex_text = "^.*\.%s$" % (re.escape(identifier))
+        regex_text = r"^.*\.%s$" % (re.escape(identifier))
         regex = re.compile(regex_text)
         for image in self.images:
             if regex.match(image.identifier):
@@ -925,7 +925,7 @@ def get(cls):
             version = r"(?:" + super().version + r"\s+)?"
             address = r"(0x[0-9a-fA-F]{4,})"  # 4 digits or more
 
-            symbol = """
+            symbol = r"""
                         (?:
                             [ ]+
                             (?P<symbol>.+)
@@ -1095,7 +1095,7 @@ def parse_normal(self, line):
             self.crashlog.process_identifier = line[11:].strip()
         elif line.startswith("Version:"):
             version_string = line[8:].strip()
-            matched_pair = re.search("(.+)\((.+)\)", version_string)
+            matched_pair = re.search(r"(.+)\((.+)\)", version_string)
             if matched_pair:
                 self.crashlog.process_version = matched_pair.group(1)
                 self.crashlog.process_compatability_version = matched_pair.group(2)

diff --git a/lldb/examples/python/delta.py b/lldb/examples/python/delta.py
@@ -99,7 +99,7 @@ def parse_log_file(file, options):
     print("# Log file: '%s'" % file)
     print("#----------------------------------------------------------------------")
 
-    timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
+    timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
 
     base_time = 0.0
     last_time = 0.0

diff --git a/lldb/examples/python/gdbremote.py b/lldb/examples/python/gdbremote.py
@@ -1537,13 +1537,13 @@ def parse_gdb_log(file, options):
     a long time during a preset set of debugger commands."""
 
     tricky_commands = ["qRegisterInfo"]
-    timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
+    timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
     packet_name_regex = re.compile("([A-Za-z_]+)[^a-z]")
     packet_transmit_name_regex = re.compile(
         "(?P<direction>send|read) packet: (?P<packet>.*)"
     )
-    packet_contents_name_regex = re.compile("\$([^#]*)#[0-9a-fA-F]{2}")
-    packet_checksum_regex = re.compile(".*#[0-9a-fA-F]{2}$")
+    packet_contents_name_regex = re.compile(r"\$([^#]*)#[0-9a-fA-F]{2}")
+    packet_checksum_regex = re.compile(r".*#[0-9a-fA-F]{2}$")
     packet_names_regex_str = "(" + "|".join(gdb_remote_commands.keys()) + ")(.*)"
     packet_names_regex = re.compile(packet_names_regex_str)
 

diff --git a/lldb/examples/python/jump.py b/lldb/examples/python/jump.py
@@ -38,7 +38,7 @@ def parse_linespec(linespec, frame, result):
             )
 
     if not matched:
-        mo = re.match("^\+([0-9]+)$", linespec)
+        mo = re.match(r"^\+([0-9]+)$", linespec)
         if mo is not None:
             matched = True
             # print "Matched +<count>"
@@ -54,7 +54,7 @@ def parse_linespec(linespec, frame, result):
             )
 
     if not matched:
-        mo = re.match("^\-([0-9]+)$", linespec)
+        mo = re.match(r"^\-([0-9]+)$", linespec)
         if mo is not None:
             matched = True
             # print "Matched -<count>"
@@ -79,7 +79,7 @@ def parse_linespec(linespec, frame, result):
             breakpoint = target.BreakpointCreateByLocation(file_name, line_number)
 
     if not matched:
-        mo = re.match("\*((0x)?([0-9a-f]+))$", linespec)
+        mo = re.match(r"\*((0x)?([0-9a-f]+))$", linespec)
         if mo is not None:
             matched = True
             # print "Matched <address-expression>"

diff --git a/lldb/examples/python/performance.py b/lldb/examples/python/performance.py
@@ -346,7 +346,7 @@ def __init__(self, pid):
 
     def Measure(self):
         output = subprocess.getoutput(self.command).split("\n")[-1]
-        values = re.split("[-+\s]+", output)
+        values = re.split(r"[-+\s]+", output)
         for idx, stat in enumerate(values):
             multiplier = 1
             if stat:

diff --git a/lldb/examples/python/symbolication.py b/lldb/examples/python/symbolication.py
@@ -177,9 +177,9 @@ class Section:
     """Class that represents an load address range"""
 
     sect_info_regex = re.compile("(?P<name>[^=]+)=(?P<range>.*)")
-    addr_regex = re.compile("^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$")
+    addr_regex = re.compile(r"^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$")
     range_regex = re.compile(
-        "^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$"
+        r"^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$"
     )
 
     def __init__(self, start_addr=None, end_addr=None, name=None):
@@ -557,7 +557,7 @@ def find_images_with_identifier(self, identifier):
             if image.identifier == identifier:
                 images.append(image)
         if len(images) == 0:
-            regex_text = "^.*\.%s$" % (re.escape(identifier))
+            regex_text = r"^.*\.%s$" % (re.escape(identifier))
             regex = re.compile(regex_text)
             for image in self.images:
                 if regex.match(image.identifier):

diff --git a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py
@@ -104,4 +104,4 @@ def cursor_forward_escape_seq(self, chars_to_move):
         Returns the escape sequence to move the cursor forward/right
         by a certain amount of characters.
         """
-        return b"\x1b\[" + str(chars_to_move).encode("utf-8") + b"C"
+        return rb"\x1b\[" + str(chars_to_move).encode("utf-8") + b"C"
diff --git a/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py b/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py
@@ -91,7 +91,7 @@ def timeout_to_seconds(timeout):
 
 
 class ProcessHelper(object):
-    """Provides an interface for accessing process-related functionality.
+    r"""Provides an interface for accessing process-related functionality.
 
     This class provides a factory method that gives the caller a
     platform-specific implementation instance of the class.

diff --git a/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py b/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py
@@ -20,15 +20,15 @@ def test_backticks_in_alias(self):
         interp = self.dbg.GetCommandInterpreter()
         result = lldb.SBCommandReturnObject()
         interp.HandleCommand(
-            "command alias _test-argv-cmd expression -Z \`argc\` -- argv", result
+            r"command alias _test-argv-cmd expression -Z \`argc\` -- argv", result
         )
         self.assertCommandReturn(result, "Made the alias")
         interp.HandleCommand("_test-argv-cmd", result)
         self.assertCommandReturn(result, "The alias worked")
 
         # Now try a harder case where we create this using an alias:
         interp.HandleCommand(
-            "command alias _test-argv-parray-cmd parray \`argc\` argv", result
+            r"command alias _test-argv-parray-cmd parray \`argc\` argv", result
         )
         self.assertCommandReturn(result, "Made the alias")
         interp.HandleCommand("_test-argv-parray-cmd", result)

diff --git a/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py b/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py
@@ -30,7 +30,7 @@ def test(self):
         alloc0 = re.search("^.*IRMemoryMap::Malloc.+?0xdead0000.*$", log, re.MULTILINE)
         # Malloc adds additional bytes to allocation size, hence 10007
         alloc1 = re.search(
-            "^.*IRMemoryMap::Malloc\s*?\(10007.+?0xdead1000.*$", log, re.MULTILINE
+            r"^.*IRMemoryMap::Malloc\s*?\(10007.+?0xdead1000.*$", log, re.MULTILINE
         )
         self.assertTrue(alloc0, "Couldn't find an allocation at a given address.")
         self.assertTrue(

diff --git a/lldb/test/API/commands/expression/test/TestExprs.py b/lldb/test/API/commands/expression/test/TestExprs.py
@@ -50,7 +50,7 @@ def build_and_run(self):
     def test_floating_point_expr_commands(self):
         self.build_and_run()
 
-        self.expect("expression 2.234f", patterns=["\(float\) \$.* = 2\.234"])
+        self.expect("expression 2.234f", patterns=[r"\(float\) \$.* = 2\.234"])
         # (float) $2 = 2.234
 
     def test_many_expr_commands(self):

diff --git a/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py b/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py
@@ -48,7 +48,7 @@ def test_gui(self):
         self.child.expect_exact("Threads")
 
         # The main thread should be expanded.
-        self.child.expect("#\d+: main")
+        self.child.expect(r"#\d+: main")
 
         # Quit the GUI
         self.child.send(escape_key)

diff --git a/lldb/test/API/commands/help/TestHelp.py b/lldb/test/API/commands/help/TestHelp.py
@@ -349,13 +349,13 @@ def test_help_show_tags(self):
         self.expect(
             "help memory read",
             patterns=[
-                "--show-tags\n\s+Include memory tags in output "
-                "\(does not apply to binary output\)."
+                r"--show-tags\n\s+Include memory tags in output "
+                r"\(does not apply to binary output\)."
             ],
         )
         self.expect(
             "help memory find",
-            patterns=["--show-tags\n\s+Include memory tags in output."],
+            patterns=[r"--show-tags\n\s+Include memory tags in output."],
         )
 
     @no_debug_info_test

diff --git a/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py b/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py
@@ -93,7 +93,7 @@ def test(self):
 
         self.runCmd("process kill")
 
-        self.runCmd("process launch -X true -w %s -- foo\ bar" % (self.getBuildDir()))
+        self.runCmd(r"process launch -X true -w %s -- foo\ bar" % (self.getBuildDir()))
 
         process = self.process()
 

diff --git a/lldb/test/API/commands/register/register/TestRegistersUnavailable.py b/lldb/test/API/commands/register/register/TestRegistersUnavailable.py
@@ -48,12 +48,12 @@ def test_unavailable_registers(self):
             "register read --all",
             patterns=[
                 "(?sm)^general purpose registers:\n"
-                "^\s+rdx = 0x5555555555555555\n"
+                r"^\s+rdx = 0x5555555555555555\n"
                 ".*"
                 "^3 registers were unavailable.\n"
                 "\n"
                 "^supplementary registers:\n"
-                "^\s+edx = 0x55555555\n"
+                r"^\s+edx = 0x55555555\n"
                 ".*"
                 "^12 registers were unavailable."
             ],

diff --git a/lldb/test/API/commands/register/register/register_command/TestRegisters.py b/lldb/test/API/commands/register/register/register_command/TestRegisters.py
@@ -630,14 +630,14 @@ def test_register_read_fields(self):
         # N/Z/C/V bits will always be present, so check only for those.
         self.expect(
             "register read cpsr",
-            patterns=["= \(N = [0|1], Z = [0|1], C = [0|1], V = [0|1]"],
+            patterns=[r"= \(N = [0|1], Z = [0|1], C = [0|1], V = [0|1]"],
         )
         self.expect(
-            "register read fpsr", patterns=["= \(QC = [0|1], IDC = [0|1], IXC = [0|1]"]
+            "register read fpsr", patterns=[r"= \(QC = [0|1], IDC = [0|1], IXC = [0|1]"]
         )
         # AHP/DN/FZ always present, others may vary.
         self.expect(
-            "register read fpcr", patterns=["= \(AHP = [0|1], DN = [0|1], FZ = [0|1]"]
+            "register read fpcr", patterns=[r"= \(AHP = [0|1], DN = [0|1], FZ = [0|1]"]
         )
 
         # Should get enumerator descriptions for RMode.