From 36af0104d8af10bf90053f4a24d61e6e7d3f174e Mon Sep 17 00:00:00 2001 From: trizen Date: Sat, 30 Jul 2022 19:18:26 +0300 Subject: [PATCH] new file: Converters/code2pdf.pl --- Converters/code2pdf.pl | 144 ++++++++++++++++++++++++++++++++++++++ Converters/gitbook2pdf.pl | 4 +- README.md | 1 + 3 files changed, 147 insertions(+), 2 deletions(-) create mode 100755 Converters/code2pdf.pl mode change 100644 => 100755 Converters/gitbook2pdf.pl diff --git a/Converters/code2pdf.pl b/Converters/code2pdf.pl new file mode 100755 index 00000000..5945f79d --- /dev/null +++ b/Converters/code2pdf.pl @@ -0,0 +1,144 @@ +#!/usr/bin/perl + +# Author: Trizen +# Date: 30 July 2022 +# https://github.com/trizen + +# Code to PDF converter, with syntax highlighting, given a summary file. + +# Uses the following tools: +# md2html -- for converting markdown to HTML +# markdown2pdf.pl -- for converting markdown to PDF (with syntax highlighting) + +use 5.010; +use strict; +use warnings; + +use open IO => ':utf8', ':std'; +use HTML::TreeBuilder 5 ('-weak'); + +use Encode qw(decode_utf8 encode_utf8); +use Getopt::Long qw(GetOptions); +use URI::Escape qw(uri_unescape); +use Digest::MD5 qw(md5_hex); + +my $markdown2pdf = "markdown2pdf.pl"; # path to the `markdown2pdf.pl` script + +my $style = 'github'; +my $title = 'Document'; +my $lang = 'perl'; + +sub usage { + my ($exit_code) = @_; + $exit_code //= 0; + + print <<"EOT"; +usage: $0 [options] [SUMMARY.md] [output.pdf] + +options: + + --style=s : style theme for `highlight` (default: $style) + --title=s : title of the PDF file (default: $title) + --lang=s : language code used for highlighting (default: $lang) + +EOT + + exit($exit_code); +} + +GetOptions( + "style=s" => \$style, + "title=s" => \$title, + "lang=s" => \$lang, + "h|help" => sub { usage(0) }, + ) + or die("Error in command line arguments\n"); + +my $input_markdown_file = $ARGV[0] // usage(2); +my $output_pdf_file = $ARGV[1] // "OUTPUT.pdf"; + +say ":: Converting $input_markdown_file to HTML..."; +my $html = `md2html $input_markdown_file`; + +if ($? != 0) { + die "`md2html` failed with code: $?"; +} + +my $tree = HTML::TreeBuilder->new(); +$tree->parse($html); +$tree->eof(); + +#my @nodes = $tree->guts(); +my @nodes = $tree->disembowel(); + +say ":: Reading Markdown files..."; +my $markdown_content = ''; + +sub expand_ul { + my ($ul, $depth) = @_; + + foreach my $t (@{$ul->content}) { + if ($t->tag eq 'li') { + foreach my $x (@{$t->content}) { + + if (!ref($x)) { + $markdown_content .= ("#" x $depth) . ' ' . $x . "\n\n"; + next; + } + + if ($x->tag eq 'ul') { + expand_ul($x, $depth + 1); + } + else { + if ($x->tag eq 'a') { + + my $href = $x->attr('href'); + my $file = decode_utf8(uri_unescape($href)); + + if (not -e $file) { + warn ":: File <<$file>> does not exist. Skipping...\n"; + next; + } + + if (open my $fh, '<:utf8', $file) { + local $/; + $markdown_content .= ("#" x $depth) . ' ' . $x->content->[0] . "\n\n"; + $markdown_content .= "```$lang\n"; + $markdown_content .= <$fh>; + if (substr($markdown_content, -1) ne "\n") { + $markdown_content .= "\n"; + } + $markdown_content .= "```\n\n"; + } + else { + warn ":: Cannot open file <<$file>> for reading: $!\n"; + } + } + } + } + } + } +} + +foreach my $entry (@nodes) { + if ($entry->tag eq 'ul') { + expand_ul($entry, 1); + } +} + +my $markdown_file = "$output_pdf_file.md"; + +open my $fh, '>:utf8', $markdown_file + or die "Can't open file <<$markdown_file>> for writing: $!"; + +print $fh $markdown_content; +close $fh; + +say ":: Converting Markdown to PDF..."; +system($markdown2pdf, "--style", $style, "--title", $title, $markdown_file, $output_pdf_file); + +unlink($markdown_file); + +if ($? != 0) { + die "`$markdown2pdf` failed with code: $?"; +} diff --git a/Converters/gitbook2pdf.pl b/Converters/gitbook2pdf.pl old mode 100644 new mode 100755 index d5d2b032..6203f623 --- a/Converters/gitbook2pdf.pl +++ b/Converters/gitbook2pdf.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#~ #!/usr/bin/perl # Author: Trizen # Date: 30 July 2022 @@ -113,7 +113,7 @@ sub expand_ul { foreach my $entry (@nodes) { if ($entry->tag eq 'ul') { - expand_ul($entry, 0); + expand_ul($entry, 1); } } diff --git a/README.md b/README.md index b44fd722..aa6a14df 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ A nice collection of day-to-day Perl scripts. * Converters * [Any to 3gp](./Converters/any_to_3gp.pl) * [Ass2srt](./Converters/ass2srt.pl) + * [Code2pdf](./Converters/code2pdf.pl) * [Euler2pdf](./Converters/euler2pdf.pl) * [From hex](./Converters/from_hex.pl) * [Gitbook2pdf](./Converters/gitbook2pdf.pl)