Skip to content

Commit

Permalink
Added support for escape character in markdown text (#1224)
Browse files Browse the repository at this point in the history
* Added support for escape character in markdown text

* updated documentation

* Fixed escape character handling & added more tests

* rephrased docu

* updated documentation

---------

Co-authored-by: KingOfKaste <[email protected]>
  • Loading branch information
david-fed and david-fed authored Jul 20, 2024
1 parent 7c91959 commit 5ffab70
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',

## [2.7.10] - Not released yet
### Added
* support for escape character for markers in markdown text [issue #1215](https://github.com/py-pdf/fpdf2/issues/1215)
* Wrapping words on spaces now considers all common space symbols in addition to regular spaces (' '), addressing issues with word-wrapping for languages like Thai, as per [#1190](https://github.com/py-pdf/fpdf2/issues/1190) and [#1191](https://github.com/py-pdf/fpdf2/pull/1191).
* [`Templates`](https://py-pdf.github.io/fpdf2/fpdf/Templates.html) can now be also defined in JSON files.
* support to optionally set `wrapmode` in templates (default `"WORD"` can optionally be set to `"CHAR"` to support wrapping on characters for scripts like Chinese or Japanese) - _cf._ [#1159](https://github.com/py-pdf/fpdf2/issues/1159) - thanks to @carlhiggs
Expand Down
11 changes: 8 additions & 3 deletions docs/TextStyling.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,20 +181,25 @@ An optional `markdown=True` parameter can be passed to the [`cell()`](fpdf/fpdf.
& [`multi_cell()`](fpdf/fpdf.html#fpdf.fpdf.FPDF.multi_cell) methods
in order to enable basic Markdown-like styling: `**bold**, __italics__, --underlined--`.

If the printable text contains a character sequence that would be incorrectly interpreted as a formatting marker, it can be escaped using `\`. The escape character works the same way it generally does in Python (see the example below).

Bold & italics require using dedicated fonts for each style.

For the standard fonts (Courier, Helvetica & Times), those dedicated fonts are configured by default:

```python
from fpdf import FPDF

pdf = fpdf.FPDF()
pdf = FPDF()
pdf.add_page()
pdf.set_font("Times", size=60)
pdf.cell(text="**Lorem** __Ipsum__ --dolor--", markdown=True)
pdf.set_font("Times", size=50)
pdf.cell(text="**Lorem** __Ipsum__ --dolor--", markdown=True, new_x='LEFT', new_y='NEXT')
pdf.cell(text="\\**Lorem\\** \\\\__Ipsum\\\\__ --dolor--", markdown=True)
pdf.output("markdown-styled.pdf")
```

![](markdown-style.png)

Using other fonts means that their variants (bold, italics)
must be registered using `add_font` with `style="B"` and `style="I"`.
Several unit tests in `test/text/` demonstrate that:
Expand Down
Binary file added docs/markdown-style.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
37 changes: 25 additions & 12 deletions fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ class FPDF(GraphicsStateMixin, TextRegionMixin):
MARKDOWN_BOLD_MARKER = "**"
MARKDOWN_ITALICS_MARKER = "__"
MARKDOWN_UNDERLINE_MARKER = "--"
MARKDOWN_ESCAPE_CHARACTER = "\\"
MARKDOWN_LINK_REGEX = re.compile(r"^\[([^][]+)\]\(([^()]+)\)(.*)$", re.DOTALL)
MARKDOWN_LINK_COLOR = None
MARKDOWN_LINK_UNDERLINE = True
Expand Down Expand Up @@ -2936,7 +2937,7 @@ def cell(
(identifier returned by `FPDF.add_link`) or external URL.
center (bool): center the cell horizontally on the page.
markdown (bool): enable minimal markdown-like markup to render part
of text as bold / italics / underlined. Default to False.
of text as bold / italics / underlined. Supports `\\` as escape character. Default to False.
txt (str): [**DEPRECATED since v2.7.6**] String to print. Default value: empty string.
Returns: a boolean indicating if page break was triggered
Expand Down Expand Up @@ -3455,6 +3456,7 @@ def frag():
font_glyphs = self.current_font.cmap
else:
font_glyphs = []
num_escape_chars = 0

while text:
is_marker = text[:2] in (
Expand All @@ -3480,16 +3482,27 @@ def frag():
and (not txt_frag or txt_frag[-1] != half_marker)
and (len(text) < 3 or text[2] != half_marker)
):
if txt_frag:
yield frag()
if text[:2] == self.MARKDOWN_BOLD_MARKER:
in_bold = not in_bold
if text[:2] == self.MARKDOWN_ITALICS_MARKER:
in_italics = not in_italics
if text[:2] == self.MARKDOWN_UNDERLINE_MARKER:
in_underline = not in_underline
text = text[2:]
continue
txt_frag = (
txt_frag[: -((num_escape_chars + 1) // 2)]
if num_escape_chars > 0
else txt_frag
)
if num_escape_chars % 2 == 0:
if txt_frag:
yield frag()
if text[:2] == self.MARKDOWN_BOLD_MARKER:
in_bold = not in_bold
if text[:2] == self.MARKDOWN_ITALICS_MARKER:
in_italics = not in_italics
if text[:2] == self.MARKDOWN_UNDERLINE_MARKER:
in_underline = not in_underline
text = text[2:]
continue
num_escape_chars = (
num_escape_chars + 1
if text[0] == self.MARKDOWN_ESCAPE_CHARACTER
else 0
)
is_link = self.MARKDOWN_LINK_REGEX.match(text)
if is_link:
link_text, link_dest, text = is_link.groups()
Expand Down Expand Up @@ -3673,7 +3686,7 @@ def multi_cell(
ln (int): **DEPRECATED since 2.5.1**: Use `new_x` and `new_y` instead.
max_line_height (float): optional maximum height of each sub-cell generated
markdown (bool): enable minimal markdown-like markup to render part
of text as bold / italics / underlined. Default to False.
of text as bold / italics / underlined. Supports `\\` as escape character. Default to False.
print_sh (bool): Treat a soft-hyphen (\\u00ad) as a normal printable
character, instead of a line breaking opportunity. Default value: False
wrapmode (fpdf.enums.WrapMode): "WORD" for word based line wrapping (default),
Expand Down
Binary file added test/text/cell_markdown_bold_italic_escaped.pdf
Binary file not shown.
Binary file added test/text/cell_markdown_escaped.pdf
Binary file not shown.
Binary file not shown.
Binary file added test/text/multi_cell_markdown_escaped.pdf
Binary file not shown.
Binary file not shown.
27 changes: 27 additions & 0 deletions test/text/test_cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,14 @@ def test_cell_markdown(tmp_path):
assert_pdf_equal(pdf, HERE / "cell_markdown.pdf", tmp_path)


def test_cell_markdown_escaped(tmp_path):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Times", size=40)
pdf.cell(text="**Lo\\rem** \\__Ipsum\\__ \\\\--dolor\\\\--", markdown=True)
assert_pdf_equal(pdf, HERE / "cell_markdown_escaped.pdf", tmp_path)


def test_cell_markdown_bold_italic(tmp_path):
# issue 1094
pdf = FPDF()
Expand All @@ -182,6 +190,14 @@ def test_cell_markdown_bold_italic(tmp_path):
assert_pdf_equal(pdf, HERE / "cell_markdown_bold_italic.pdf", tmp_path)


def test_cell_markdown_bold_italic_escaped(tmp_path):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Times", size=40)
pdf.cell(text="**__Lorem \\--Ipsum\\--__**", markdown=True)
assert_pdf_equal(pdf, HERE / "cell_markdown_bold_italic_escaped.pdf", tmp_path)


def test_cell_markdown_with_ttf_fonts(tmp_path):
pdf = FPDF()
pdf.add_page()
Expand All @@ -193,6 +209,17 @@ def test_cell_markdown_with_ttf_fonts(tmp_path):
assert_pdf_equal(pdf, HERE / "cell_markdown_with_ttf_fonts.pdf", tmp_path)


def test_cell_markdown_with_ttf_fonts_escaped(tmp_path):
pdf = FPDF()
pdf.add_page()
pdf.add_font("Roboto", "", FONTS_DIR / "Roboto-Regular.ttf")
pdf.add_font("Roboto", "B", FONTS_DIR / "Roboto-Bold.ttf")
pdf.add_font("Roboto", "I", FONTS_DIR / "Roboto-Italic.ttf")
pdf.set_font("Roboto", size=40)
pdf.cell(text="**Lo\\rem** \\__Ipsum\\__ \\\\--dolor\\\\--", markdown=True)
assert_pdf_equal(pdf, HERE / "cell_markdown_with_ttf_fonts_escaped.pdf", tmp_path)


def test_cell_markdown_missing_ttf_font():
pdf = FPDF()
pdf.add_page()
Expand Down
89 changes: 89 additions & 0 deletions test/text/test_markdown_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,62 @@ def test_markdown_parse_simple_ok():
assert frags == expected


def test_markdown_parse_simple_ok_escaped():
frags = tuple(
FPDF()._parse_chars(
"\\**bold\\**, \\__italics\\__ and \\--underlined\\-- escaped", True
)
)
expected = (
Fragment("**bold**, __italics__ and --underlined-- escaped", GSTATE, k=PDF.k),
)
assert frags == expected
frags = tuple(
FPDF()._parse_chars(
r"raw \**bold\**, \__italics\__ and \--underlined\-- escaped", True
)
)
expected = (
Fragment(
"raw **bold**, __italics__ and --underlined-- escaped", GSTATE, k=PDF.k
),
)
assert frags == expected
frags = tuple(FPDF()._parse_chars("escape *\\*between marker*\\*", True))
expected = (Fragment("escape *\\*between marker*\\*", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(FPDF()._parse_chars("escape **\\after marker**\\", True))
expected = (
Fragment("escape ", GSTATE, k=PDF.k),
Fragment("\\after marker", GSTATE_B, k=PDF.k),
Fragment("\\", GSTATE, k=PDF.k),
)


def test_markdown_unrelated_escape():
frags = tuple(FPDF()._parse_chars("unrelated \\ escape \\**bold\\**", True))
expected = (Fragment("unrelated \\ escape **bold**", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(
FPDF()._parse_chars("unrelated \\\\ double escape \\**bold\\**", True)
)
expected = (Fragment("unrelated \\\\ double escape **bold**", GSTATE, k=PDF.k),)
assert frags == expected


def test_markdown_parse_multiple_escape():
frags = tuple(FPDF()._parse_chars("\\\\**bold\\\\** double escaped", True))
expected = (
Fragment("\\", GSTATE, k=PDF.k),
Fragment("bold\\", GSTATE_B, k=PDF.k),
Fragment(" double escaped", GSTATE, k=PDF.k),
)
assert frags == expected
frags = tuple(FPDF()._parse_chars("\\\\\\**triple bold\\\\\\** escaped", True))
expected = (Fragment("\\**triple bold\\** escaped", GSTATE, k=PDF.k),)
assert frags == expected


def test_markdown_parse_overlapping():
frags = tuple(FPDF()._parse_chars("**bold __italics__**", True))
expected = (
Expand All @@ -35,6 +91,12 @@ def test_markdown_parse_overlapping():
assert frags == expected


def test_markdown_parse_overlapping_escaped():
frags = tuple(FPDF()._parse_chars("**bold \\__italics\\__**", True))
expected = (Fragment("bold __italics__", GSTATE_B, k=PDF.k),)
assert frags == expected


def test_markdown_parse_crossing_markers():
frags = tuple(FPDF()._parse_chars("**bold __and** italics__", True))
expected = (
Expand All @@ -45,6 +107,15 @@ def test_markdown_parse_crossing_markers():
assert frags == expected


def test_markdown_parse_crossing_markers_escaped():
frags = tuple(FPDF()._parse_chars("**bold __and\\** italics__", True))
expected = (
Fragment("bold ", GSTATE_B, k=PDF.k),
Fragment("and** italics", GSTATE_BI, k=PDF.k),
)
assert frags == expected


def test_markdown_parse_unterminated():
frags = tuple(FPDF()._parse_chars("**bold __italics__", True))
expected = (
Expand All @@ -54,6 +125,15 @@ def test_markdown_parse_unterminated():
assert frags == expected


def test_markdown_parse_unterminated_escaped():
frags = tuple(FPDF()._parse_chars("**bold\\** __italics__", True))
expected = (
Fragment("bold** ", GSTATE_B, k=PDF.k),
Fragment("italics", GSTATE_BI, k=PDF.k),
)
assert frags == expected


def test_markdown_parse_line_of_markers():
frags = tuple(FPDF()._parse_chars("*** woops", True))
expected = (Fragment("*** woops", GSTATE, k=PDF.k),)
Expand All @@ -72,6 +152,15 @@ def test_markdown_parse_line_of_markers():
assert frags == expected


def test_markdown_parse_line_of_markers_escaped():
frags = tuple(FPDF()._parse_chars("\\****BOLD**", True))
expected = (Fragment("\\****BOLD", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(FPDF()._parse_chars("*\\***BOLD**", True))
expected = (Fragment("*\\***BOLD", GSTATE, k=PDF.k),)
assert frags == expected


def test_markdown_parse_newline_after_markdown_link(): # issue 916
text = "[fpdf2](https://py-pdf.github.io/fpdf2/)\nGo visit it!"
frags = tuple(FPDF()._parse_chars(text, True))
Expand Down
37 changes: 37 additions & 0 deletions test/text/test_multi_cell_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ def test_multi_cell_markdown(tmp_path):
assert_pdf_equal(pdf, HERE / "multi_cell_markdown.pdf", tmp_path)


def test_multi_cell_markdown_escaped(tmp_path):
pdf = fpdf.FPDF()
pdf.add_page()
pdf.set_font("Times", "", 32)
text = ( # Some text where styling occur over line breaks:
"Lorem ipsum \\ dolor amet, \\**consectetur adipiscing\\** elit,"
" sed do eiusmod \\\\__tempor incididunt\\\\__ ut labore et dolore --magna aliqua--."
)
pdf.multi_cell(
w=pdf.epw, text=text, markdown=True
) # This is tricky to get working well
pdf.ln()
pdf.multi_cell(w=pdf.epw, text=text, markdown=True, align="L")
assert_pdf_equal(pdf, HERE / "multi_cell_markdown_escaped.pdf", tmp_path)


def test_multi_cell_markdown_with_ttf_fonts(tmp_path):
pdf = fpdf.FPDF()
pdf.add_page()
Expand All @@ -44,6 +60,27 @@ def test_multi_cell_markdown_with_ttf_fonts(tmp_path):
assert_pdf_equal(pdf, HERE / "multi_cell_markdown_with_ttf_fonts.pdf", tmp_path)


def test_multi_cell_markdown_with_ttf_fonts_escaped(tmp_path):
pdf = fpdf.FPDF()
pdf.add_page()
pdf.add_font("Roboto", "", FONTS_DIR / "Roboto-Regular.ttf")
pdf.add_font("Roboto", "B", FONTS_DIR / "Roboto-Bold.ttf")
pdf.add_font("Roboto", "I", FONTS_DIR / "Roboto-Italic.ttf")
pdf.set_font("Roboto", size=32)
text = ( # Some text where styling occur over line breaks:
"Lorem ipsum \\ dolor, \\**consectetur adipiscing\\** elit,"
" eiusmod \\\\__tempor incididunt\\\\__ ut labore et dolore --magna aliqua--."
)
pdf.multi_cell(
w=pdf.epw, text=text, markdown=True
) # This is tricky to get working well
pdf.ln()
pdf.multi_cell(w=pdf.epw, text=text, markdown=True, align="L")
assert_pdf_equal(
pdf, HERE / "multi_cell_markdown_with_ttf_fonts_escaped.pdf", tmp_path
)


def test_multi_cell_markdown_missing_ttf_font():
pdf = fpdf.FPDF()
pdf.add_page()
Expand Down

0 comments on commit 5ffab70

Please sign in to comment.