diff --git a/babel/messages/extract.py b/babel/messages/extract.py
index 8d4bbeaf8..7650cfe1e 100644
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -502,14 +502,6 @@ def extract_python(
     :param options: a dictionary of additional options (optional)
     :rtype: ``iterator``
     """
-    funcname = lineno = message_lineno = None
-    call_stack = -1
-    buf = []
-    messages = []
-    translator_comments = []
-    in_def = in_translator_comments = False
-    comment_tag = None
-
     encoding = parse_encoding(fileobj) or options.get('encoding', 'UTF-8')
     future_flags = parse_future_flags(fileobj, encoding)
     next_line = lambda: fileobj.readline().decode(encoding)
@@ -520,103 +512,145 @@ def extract_python(
     # currently parsing one.
     current_fstring_start = None
 
-    for tok, value, (lineno, _), _, _ in tokens:
-        if call_stack == -1 and tok == NAME and value in ('def', 'class'):
+    # Keep the stack of all function calls and its related contextual variables,
+    # so we can handle nested gettext calls.
+    function_stack = []
+    # Keep the last encountered function name for when we encounter
+    # an opening parenthesis
+    last_function_name = None
+    # Keep track of whether we're in a class or function definition
+    in_def = False
+    # Keep track of whether we're in a block of translator comments
+    in_translator_comments = False
+    # Keep track of the last encountered translator comments
+    translator_comments = []
+    # Keep track of the (split) strings encountered
+    message_buffer = []
+
+    for token, value, (line_no, _), _, _ in tokens:
+        if not function_stack and token == NAME and value in ('def', 'class'):
+            # We're entering a class or function definition
             in_def = True
-        elif tok == OP and value == '(':
-            if in_def:
-                # Avoid false positives for declarations such as:
-                # def gettext(arg='message'):
-                in_def = False
-                continue
-            if funcname:
-                message_lineno = lineno
-                call_stack += 1
-        elif in_def and tok == OP and value == ':':
-            # End of a class definition without parens
+
+        elif in_def and token == OP and value in ('(', ':'):
+            # We're in a class or function definition and should not do anything
             in_def = False
             continue
-        elif call_stack == -1 and tok == COMMENT:
+
+        elif token == OP and value == '(' and last_function_name:
+            # We're entering a function call
+            cur_translator_comments = translator_comments
+            if function_stack and function_stack[-1]['function_line_no'] == line_no:
+                # If our current function call is on the same line as the previous one,
+                # copy their translator comments, since they also apply to us.
+                cur_translator_comments = function_stack[-1]['translator_comments']
+
+            # We add all information needed later for the current function call
+            function_stack.append({
+                'function_line_no': line_no,
+                'function_name': last_function_name,
+                'message_line_no': None,
+                'messages': [],
+                'translator_comments': cur_translator_comments,
+            })
+            translator_comments = []
+
+        elif token == COMMENT:
             # Strip the comment token from the line
             value = value[1:].strip()
-            if in_translator_comments and \
-                    translator_comments[-1][0] == lineno - 1:
+            if in_translator_comments and translator_comments[-1][0] == line_no - 1:
                 # We're already inside a translator comment, continue appending
-                translator_comments.append((lineno, value))
+                translator_comments.append((line_no, value))
                 continue
-            # If execution reaches this point, let's see if comment line
-            # starts with one of the comment tags
+
             for comment_tag in comment_tags:
                 if value.startswith(comment_tag):
+                    # Comment starts with one of the comment tags,
+                    # so let's start capturing it
                     in_translator_comments = True
-                    translator_comments.append((lineno, value))
+                    translator_comments.append((line_no, value))
                     break
-        elif funcname and call_stack == 0:
-            nested = (tok == NAME and value in keywords)
-            if (tok == OP and value == ')') or nested:
-                if buf:
-                    messages.append(''.join(buf))
-                    del buf[:]
+
+        elif function_stack and function_stack[-1]['function_name'] in keywords:
+            # We're inside a translation function call
+            if token == OP and value == ')':
+                # The call has ended, so we yield the translatable term(s)
+                messages = function_stack[-1]['messages']
+                line_no = (
+                    function_stack[-1]['message_line_no']
+                    or function_stack[-1]['function_line_no']
+                )
+                cur_translator_comments = function_stack[-1]['translator_comments']
+
+                if message_buffer:
+                    messages.append(''.join(message_buffer))
+                    message_buffer.clear()
                 else:
                     messages.append(None)
 
                 messages = tuple(messages) if len(messages) > 1 else messages[0]
-                # Comments don't apply unless they immediately
-                # precede the message
-                if translator_comments and \
-                        translator_comments[-1][0] < message_lineno - 1:
-                    translator_comments = []
-
-                yield (message_lineno, funcname, messages,
-                       [comment[1] for comment in translator_comments])
-
-                funcname = lineno = message_lineno = None
-                call_stack = -1
-                messages = []
-                translator_comments = []
-                in_translator_comments = False
-                if nested:
-                    funcname = value
-            elif tok == STRING:
-                val = _parse_python_string(value, encoding, future_flags)
-                if val is not None:
-                    buf.append(val)
+                if (
+                    cur_translator_comments
+                    and cur_translator_comments[-1][0] < line_no - 1
+                ):
+                    # The translator comments are not immediately preceding the current
+                    # term, so we skip them.
+                    cur_translator_comments = []
+
+                yield (
+                    line_no,
+                    function_stack[-1]['function_name'],
+                    messages,
+                    [comment[1] for comment in cur_translator_comments],
+                )
+
+                function_stack.pop()
+
+            elif token == STRING:
+                # We've encountered a string inside a translation function call
+                string_value = _parse_python_string(value, encoding, future_flags)
+                if not function_stack[-1]['message_line_no']:
+                    function_stack[-1]['message_line_no'] = line_no
+                if string_value is not None:
+                    message_buffer.append(string_value)
 
             # Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
-            elif tok == FSTRING_START:
+            elif token == FSTRING_START:
                 current_fstring_start = value
-            elif tok == FSTRING_MIDDLE:
+            elif token == FSTRING_MIDDLE:
                 if current_fstring_start is not None:
                     current_fstring_start += value
-            elif tok == FSTRING_END:
+            elif token == FSTRING_END:
                 if current_fstring_start is not None:
                     fstring = current_fstring_start + value
-                    val = _parse_python_string(fstring, encoding, future_flags)
-                    if val is not None:
-                        buf.append(val)
-
-            elif tok == OP and value == ',':
-                if buf:
-                    messages.append(''.join(buf))
-                    del buf[:]
+                    string_value = _parse_python_string(fstring, encoding, future_flags)
+                    if string_value is not None:
+                        message_buffer.append(string_value)
+
+            elif token == OP and value == ',':
+                # End of a function call argument
+                if message_buffer:
+                    function_stack[-1]['messages'].append(''.join(message_buffer))
+                    message_buffer.clear()
                 else:
-                    messages.append(None)
-                if translator_comments:
-                    # We have translator comments, and since we're on a
-                    # comma(,) user is allowed to break into a new line
-                    # Let's increase the last comment's lineno in order
-                    # for the comment to still be a valid one
-                    old_lineno, old_comment = translator_comments.pop()
-                    translator_comments.append((old_lineno + 1, old_comment))
-        elif call_stack > 0 and tok == OP and value == ')':
-            call_stack -= 1
-        elif funcname and call_stack == -1:
-            funcname = None
-        elif tok == NAME and value in keywords:
-            funcname = value
+                    function_stack[-1]['messages'].append(None)
+
+        elif function_stack and token == OP and value == ')':
+            function_stack.pop()
+
+        if in_translator_comments and translator_comments[-1][0] < line_no:
+            # We have a newline in between the comments, so they don't belong
+            # together anymore
+            in_translator_comments = False
+
+        if token == NAME:
+            last_function_name = value
+            if function_stack and not function_stack[-1]['message_line_no']:
+                function_stack[-1]['message_line_no'] = line_no
 
-        if (current_fstring_start is not None
-            and tok not in {FSTRING_START, FSTRING_MIDDLE}
+        if (
+            current_fstring_start is not None
+            and token not in {FSTRING_START, FSTRING_MIDDLE}
         ):
             # In Python 3.12, tokens other than FSTRING_* mean the
             # f-string is dynamic, so we don't wan't to extract it.
diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py
index 7d3a05aa7..9b6348813 100644
--- a/tests/messages/test_extract.py
+++ b/tests/messages/test_extract.py
@@ -97,10 +97,10 @@ def test_comments_with_calls_that_spawn_multiple_lines(self):
         messages = list(extract.extract_python(buf, ('ngettext', '_'), ['NOTE:'],
 
                                                {'strip_comment_tags': False}))
-        assert messages[0] == (3, 'ngettext', ('Catalog deleted.', 'Catalogs deleted.', None), ['NOTE: This Comment SHOULD Be Extracted'])
+        assert messages[0] == (2, 'ngettext', ('Catalog deleted.', 'Catalogs deleted.', None), ['NOTE: This Comment SHOULD Be Extracted'])
         assert messages[1] == (6, '_', 'Locale deleted.', ['NOTE: This Comment SHOULD Be Extracted'])
         assert messages[2] == (10, 'ngettext', ('Foo deleted.', 'Foos deleted.', None), ['NOTE: This Comment SHOULD Be Extracted'])
-        assert messages[3] == (15, 'ngettext', ('Bar deleted.', 'Bars deleted.', None), ['NOTE: This Comment SHOULD Be Extracted', 'NOTE: And This One Too'])
+        assert messages[3] == (14, 'ngettext', ('Bar deleted.', 'Bars deleted.', None), ['NOTE: This Comment SHOULD Be Extracted', 'NOTE: And This One Too'])
 
     def test_declarations(self):
         buf = BytesIO(b"""\
@@ -422,24 +422,44 @@ def test_nested_messages(self):
 # NOTE: Third
 _(u'Hello, {0} and {1}!', _(u'Heungsub'),
   _(u'Armin'))
+
+# NOTE: Fourth
+_("Hello %(person)", person=random_function(_("Person")))
+
+# NOTE: Fifth
+_("Hello %(people)",
+    person=random_function(
+        ", ".join([_("Person 1"), _("Person 2")])
+    )
+)
 """)
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
-        assert messages[0][2] == ('Hello, {name}!', None)
+        assert messages[0][2] == 'Foo Bar'
         assert messages[0][3] == ['NOTE: First']
-        assert messages[1][2] == 'Foo Bar'
-        assert messages[1][3] == []
-        assert messages[2][2] == ('Hello, {name1} and {name2}!', None)
+        assert messages[1][2] == ('Hello, {name}!', None)
+        assert messages[1][3] == ['NOTE: First']
+        assert messages[2][2] == 'Heungsub'
         assert messages[2][3] == ['NOTE: Second']
-        assert messages[3][2] == 'Heungsub'
+        assert messages[3][2] == 'Armin'
         assert messages[3][3] == []
-        assert messages[4][2] == 'Armin'
-        assert messages[4][3] == []
-        assert messages[5][2] == ('Hello, {0} and {1}!', None)
+        assert messages[4][2] == ('Hello, {name1} and {name2}!', None, None)
+        assert messages[4][3] == ['NOTE: Second']
+        assert messages[5][2] == 'Heungsub'
         assert messages[5][3] == ['NOTE: Third']
-        assert messages[6][2] == 'Heungsub'
+        assert messages[6][2] == 'Armin'
         assert messages[6][3] == []
-        assert messages[7][2] == 'Armin'
-        assert messages[7][3] == []
+        assert messages[7][2] == ('Hello, {0} and {1}!', None, None)
+        assert messages[7][3] == ['NOTE: Third']
+        assert messages[8][2] == 'Person'
+        assert messages[8][3] == ['NOTE: Fourth']
+        assert messages[9][2] == ('Hello %(person)', None)
+        assert messages[9][3] == ['NOTE: Fourth']
+        assert messages[10][2] == 'Person 1'
+        assert messages[10][3] == []
+        assert messages[11][2] == 'Person 2'
+        assert messages[11][3] == []
+        assert messages[12][2] == ('Hello %(people)', None)
+        assert messages[12][3] == ['NOTE: Fifth']
 
 
 class ExtractTestCase(unittest.TestCase):