-
Notifications
You must be signed in to change notification settings - Fork 0
/
dlfromopf.php
112 lines (96 loc) · 5.31 KB
/
dlfromopf.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
<?php
require 'vendor/autoload.php';
ini_set("memory_limit", "512M");
use TheCodingMachine\Gotenberg\Request;
use TheCodingMachine\Gotenberg\Client;
use TheCodingMachine\Gotenberg\DocumentFactory;
use TheCodingMachine\Gotenberg\HTMLRequest;
use TheCodingMachine\Gotenberg\MergeRequest;
$client = new Client('http://localhost:3000', new \Http\Adapter\Guzzle6\Client());
define('CONTEXT', stream_context_create(['ssl' => ['verify_peer' => false, 'verify_peer_name' => false]]));
$saveDir = readline('Dossier de sauvegarde : (./manuel/) ') ?: './manuel/';
if ($saveDir[strlen($saveDir)-1] !== '/') $saveDir .= '/';
$useJson = false;
if (!is_dir($saveDir)) {
mkdir($saveDir, 077, true);
} elseif (file_exists($saveDir . 'params.json')) {
$params = json_decode(file_get_contents($saveDir . 'params.json'), true);
$useJson = in_array(readline("Fichier de paramètres trouvé, l'utiliser ? (O)"), ['', 'O', 'o']);
}
if ($useJson && isset($params)) {
define('OPF', $params['OPF']);
$pageName = $params['pageName'];
$finaleFilename = $params['finaleFilename'];
} else {
define('OPF', readline('URL de du .opf : (https://biblio.manuel-numerique.com/epubs/NATHAN/bibliomanuels/distrib_gp/2/1/10403/online/OEBPS/content.opf) ') ?: 'https://biblio.manuel-numerique.com/epubs/NATHAN/bibliomanuels/distrib_gp/2/1/10403/online/OEBPS/content.opf');
$pageName = readline('Nom fichier page : (Page_%05d.xhtml) ') ?: 'Page_%05d.xhtml';
$finaleFilename = readline('Nom du fichier final : (manuel.pdf) ') ?: 'manuel.pdf';
file_put_contents($saveDir . 'params.json', json_encode(['OPF' => OPF, 'pageName' => $pageName, 'finaleFilename' => $finaleFilename]));
}
$opfFile = file_get_contents(OPF, false, CONTEXT);
$xml = new SimpleXMLElement($opfFile);
$pages = [];
foreach ($xml->manifest->item as $item) {
$exploded = explode('/', $item['href']);
$name = array_pop($exploded);
if ($name === sprintf($pageName, sscanf($name, $pageName)[0])) {
array_push($pages, strval($item['href']));
}
}
define('BASE_URL', implode('/', array_slice(explode('/', OPF), 0, -1)) . '/');
define('ROOT_URL', implode('/', array_slice(explode('/', OPF), 0, 3)));
$html = '';
$files = [];
foreach ($pages as $pageUrl) {
if (file_exists($saveDir . urlencode($pageUrl) . '.pdf')) {
print("Page $pageUrl déjà téléchargée...\n");
array_push($files, DocumentFactory::makeFromPath(urlencode($pageUrl) . '.pdf', $saveDir . urlencode($pageUrl) . '.pdf'));
continue;
}
print("Téléchargement de la page $pageUrl...\n");
$currentUrlPath = BASE_URL . implode('/', array_slice(explode('/', $pageUrl), 0, -1)) . '/';
$html = file_get_contents(BASE_URL . $pageUrl, false, CONTEXT);
if (!$html) {
print("Erreur page, arrêt.\n");
die();
}
$html = preg_replace_callback('/background-image:(.*)url\(\'(.+)\'\);/mU', function ($matches) use ($currentUrlPath) {
$ext = pathinfo($matches[2], PATHINFO_EXTENSION);
$prefix = $matches[2][0] === '/' ? ROOT_URL : $currentUrlPath;
return 'background-image:' . $matches[1] . 'url(\'data:image/' . $ext . ';base64,' . base64_encode(file_get_contents($prefix . $matches[2], false, CONTEXT)) . '\');';
}, $html);
$html = preg_replace_callback('/<img(.+)src="(.+)"(.*)>/mU', function ($matches) use ($currentUrlPath) {
$ext = pathinfo($matches[2], PATHINFO_EXTENSION);
$prefix = $matches[2][0] === '/' ? ROOT_URL : $currentUrlPath;
return '<img' . $matches[1] . 'src="data:image/' . $ext . ';base64,' . base64_encode(file_get_contents($prefix . $matches[2], false, CONTEXT)) . '"' . $matches[3] . '>';
}, $html);
preg_match_all('/href="(.+\.css)"/mU', $html, $matches);
$assets = array_map(function ($match) use ($currentUrlPath) {
$prefix = $match[0] === '/' ? ROOT_URL : $currentUrlPath;
$css = file_get_contents($prefix . $match, false, CONTEXT);
$css = preg_replace_callback('/src:.*url\(\'(.+\.ttf)\'\);/m', function ($matches) use ($currentUrlPath) {
$prefix = $matches[1][0] === '/' ? ROOT_URL : $currentUrlPath;
return 'url(data:font/ttf;base64,' . base64_encode(file_get_contents($prefix . $matches[1], false, CONTEXT)) . ')';
}, $css);
$css = preg_replace('/^.*animation.*$/m', '', $css);
return DocumentFactory::makeFromString($match, $css);
}, $matches[1]);
preg_match('/<body.+style=".*width:\D*(\d+)px\D*height:\D*(\d+)\D*".*>/mU', $html, $dimensions);
print("Convertion et téléchargement de la page sous `" . $saveDir . urlencode($pageUrl) . '.pdf' . "`...\n");
$index = DocumentFactory::makeFromString('index.html', $html);
$request = new HTMLRequest($index);
$request->setAssets($assets);
$request->setMargins(Request::NO_MARGINS);
$request->setPaperWidth($dimensions[1]/96);
$request->setPaperHeight($dimensions[2]/96);
$request->setScale(1);
$request->setPageRanges('1');
$request->setWaitTimeout(30);
$client->store($request, $saveDir . urlencode($pageUrl) . '.pdf');
array_push($files, DocumentFactory::makeFromPath(urlencode($pageUrl) . '.pdf', $saveDir . urlencode($pageUrl) . '.pdf'));
}
print("Fin du manuel, fusion des pages...\n");
$request = new MergeRequest($files);
$request->setWaitTimeout(120);
$client->store($request, $saveDir . $finaleFilename);
print("Fin.");