1717// Script logic
1818////////////////
1919
20+ // Check we have required options
2021if (empty ($ bookSlug ) || empty ($ outFolder )) {
2122 errorOut ("Both a book slug and output folder must be provided " );
2223}
2324
25+ // Create the output folder if it does not exist
2426if (!is_dir ($ outFolder )) {
2527 mkdir ($ outFolder , 0777 , true );
2628}
2729
30+ // Get full output directory and book details
2831$ outDir = realpath ($ outFolder );
2932$ book = getBookBySlug ($ bookSlug );
3033
34+ // Error out if we don't have a book
3135if (is_null ($ book )) {
3236 errorOut ("Could not find book with the URL slug: {$ bookSlug }" );
3337}
3438
39+ // Get all chapters and pages within the book
3540$ chapters = getAllOfAtListEndpoint ("api/chapters " , ['filter[book_id] ' => $ book ['id ' ]]);
3641$ pages = getAllOfAtListEndpoint ("api/pages " , ['filter[book_id] ' => $ book ['id ' ]]);
3742
43+ // Get the full content for each page
3844foreach ($ pages as $ index => $ page ) {
3945 $ pages [$ index ] = apiGetJson ("api/pages/ {$ page ['id ' ]}" );
4046}
4147
48+ // Create the image output directory
4249if (!is_dir ($ outDir . "/images " )) {
4350 mkdir ($ outDir . "/images " , 0777 , true );
4451}
4552
53+ // Find the pages that are not within a chapter
4654$ directBookPages = array_filter ($ pages , function ($ page ) {
4755 return empty ($ page ['chapter_id ' ]);
4856});
4957
5058// Create book index file
51- $ bookIndex = getBookContent ($ book , $ chapters , $ directBookPages );
59+ $ bookIndex = getBookHtmlOutput ($ book , $ chapters , $ directBookPages );
5260file_put_contents ($ outDir . "/index.html " , $ bookIndex );
5361
62+ // Create a HTML file for each chapter
63+ // in addition to each page within those chapters
5464foreach ($ chapters as $ chapter ) {
5565 $ childPages = array_filter ($ pages , function ($ page ) use ($ chapter ) {
5666 return $ page ['chapter_id ' ] == $ chapter ['id ' ];
5767 });
58- $ chapterPage = getChapterContent ($ chapter , $ childPages );
68+ $ chapterPage = getChapterHtmlOutput ($ chapter , $ childPages );
5969 file_put_contents ($ outDir . "/chapter- {$ chapter ['slug ' ]}.html " , $ chapterPage );
6070
6171 foreach ($ childPages as $ childPage ) {
62- $ childPageContent = getPageContent ($ childPage , $ chapter );
72+ $ childPageContent = getPageHtmlOutput ($ childPage , $ chapter );
6373 $ childPageContent = extractImagesFromHtml ($ childPageContent );
6474 file_put_contents ($ outDir . "/page- {$ childPage ['slug ' ]}.html " , $ childPageContent );
6575 }
6676}
6777
78+ // Create a file for each direct child book page
6879foreach ($ directBookPages as $ directPage ) {
69- $ directPageContent = getPageContent ($ directPage , null );
80+ $ directPageContent = getPageHtmlOutput ($ directPage , null );
7081 $ directPageContent = extractImagesFromHtml ($ directPageContent );
7182 file_put_contents ($ outDir . "/page- {$ directPage ['slug ' ]}.html " , $ directPageContent );
7283}
7384
85+ /**
86+ * Scan the given HTML for image URL's and extract those images
87+ * to save them locally and update the HTML references to point
88+ * to the local files.
89+ */
7490function extractImagesFromHtml (string $ html ): string {
7591 global $ outDir ;
92+ static $ savedImages = [];
7693 $ matches = [];
7794 preg_match_all ('/<img.*?src=[" \'](.*?)[ \'"].*?>/i ' , $ html , $ matches );
7895 foreach (array_unique ($ matches [1 ] ?? []) as $ url ) {
79- $ image = file_get_contents ($ url );
96+ $ image = getImageFile ($ url );
97+ if ($ image === false ) {
98+ continue ;
99+ }
100+
80101 $ name = basename ($ url );
81102 $ fileName = $ name ;
82103 $ count = 1 ;
83- while (file_exists ( $ outDir . " /images/ " . $ fileName )) {
104+ while (isset ( $ savedImages [ $ fileName] )) {
84105 $ fileName = $ count . '- ' . $ name ;
106+ $ count ++;
85107 }
108+
109+ $ savedImages [$ fileName ] = true ;
86110 file_put_contents ($ outDir . "/images/ " . $ fileName , $ image );
87111 $ html = str_replace ($ url , "./images/ " . $ fileName , $ html );
88112 }
89113 return $ html ;
90114}
91115
92- function getImageFile ($ url ): string {
116+ /**
117+ * Get an image file from the given URL.
118+ * Checks if it's hosted on the same instance as the API we're
119+ * using so that auth details can be provided for BookStack images
120+ * in case local_secure images are in use.
121+ */
122+ function getImageFile (string $ url ): string {
93123 global $ apiUrl ;
94124 if (strpos (strtolower ($ url ), strtolower ($ apiUrl )) === 0 ) {
95125 $ url = substr ($ url , strlen ($ apiUrl ));
96126 return apiGet ($ url );
97127 }
98- return file_get_contents ($ url );
128+ return @ file_get_contents ($ url );
99129}
100130
101- function getBookContent (array $ book , array $ chapters , array $ pages ): string {
131+ /**
132+ * Get the HTML representation of a book.
133+ */
134+ function getBookHtmlOutput (array $ book , array $ chapters , array $ pages ): string {
102135 $ content = "<h1> {$ book ['name ' ]}</h1> " ;
103136 $ content .= "<p> {$ book ['description ' ]}</p> " ;
104137 $ content .= "<hr> " ;
@@ -119,7 +152,10 @@ function getBookContent(array $book, array $chapters, array $pages): string {
119152 return $ content ;
120153}
121154
122- function getChapterContent (array $ chapter , array $ pages ): string {
155+ /**
156+ * Get the HTML representation of a chapter.
157+ */
158+ function getChapterHtmlOutput (array $ chapter , array $ pages ): string {
123159 $ content = "<p><a href='./index.html'>Back to book</a></p> " ;
124160 $ content .= "<h1> {$ chapter ['name ' ]}</h1> " ;
125161 $ content .= "<p> {$ chapter ['description ' ]}</p> " ;
@@ -134,7 +170,10 @@ function getChapterContent(array $chapter, array $pages): string {
134170 return $ content ;
135171}
136172
137- function getPageContent (array $ page , ?array $ parentChapter ): string {
173+ /**
174+ * Get the HTML representation of a page.
175+ */
176+ function getPageHtmlOutput (array $ page , ?array $ parentChapter ): string {
138177 if (is_null ($ parentChapter )) {
139178 $ content = "<p><a href='./index.html'>Back to book</a></p> " ;
140179 } else {
@@ -189,7 +228,7 @@ function apiGet(string $endpoint): string {
189228 $ url = rtrim ($ apiUrl , '/ ' ) . '/ ' . ltrim ($ endpoint , '/ ' );
190229 $ opts = ['http ' => ['header ' => "Authorization: Token {$ clientId }: {$ clientSecret }" ]];
191230 $ context = stream_context_create ($ opts );
192- return file_get_contents ($ url , false , $ context );
231+ return @ file_get_contents ($ url , false , $ context );
193232}
194233
195234/**
@@ -211,6 +250,9 @@ function dd(...$args) {
211250 exit (1 );
212251}
213252
253+ /**
254+ * Alert of an error then exit the script.
255+ */
214256function errorOut (string $ text ) {
215257 echo "ERROR: " . $ text ;
216258 exit (1 );
0 commit comments