1111import argparse
1212import subprocess
1313import shutil
14+ import lxml .html
1415
1516
1617def main ():
@@ -210,13 +211,14 @@ def convert_file(dst_path, fn, editors):
210211
211212 """
212213 print (fn )
213- subprocess .check_call (['jupyter' , 'nbconvert' , '--to' , 'rst ' ,
214+ subprocess .check_call (['jupyter' , 'nbconvert' , '--to' , 'html ' ,
214215 '--output-dir' , os .path .abspath (dst_path ),
215216 os .path .abspath (fn )],
216217 cwd = dst_path , stderr = subprocess .STDOUT )
217218
218219 basename = os .path .splitext (os .path .basename (fn ))[0 ]
219220 rst_fn = os .path .join (dst_path , basename + '.rst' )
221+ html_fn = os .path .join (dst_path , basename + '.html' )
220222
221223 title = None
222224 tags = set ()
@@ -225,59 +227,70 @@ def convert_file(dst_path, fn, editors):
225227
226228 lines = []
227229
228- with open (rst_fn , 'r' ) as f :
229- prev_line = ''
230- for orig_line in f :
231- line = orig_line .strip ()
232- m = re .match ('^===+\s*$' , line )
233- m2 = re .match ('^---+\s*$' , line )
234- if m or m2 :
235- if prev_line and len (line ) >= 1 + len (prev_line )// 2 and not title :
236- title = prev_line .strip ()
237- lines = lines [:- 1 ]
238- continue
239-
240- m = re .match ('^TAGS:\s*(.*)\s*$' , line )
241- if m :
242- tag_line = m .group (1 ).strip ().replace (';' , ',' )
243- tags .update ([x .strip () for x in tag_line .split ("," )])
244- continue
245-
246- m = re .match ('^AUTHORS:\s*(.*)\s*$' , line )
247- if m :
248- # Author lines override editors
249- if legacy_editors :
250- editors = []
251- legacy_editors = False
252- author_line = m .group (1 ).strip ().replace (';' , ',' )
253- for author in author_line .split ("," ):
254- author = author .strip ()
255- if author and author not in editors :
256- editors .append (author )
257- continue
230+ # Parse and munge HTML
231+ tree = lxml .html .parse (html_fn )
232+ os .unlink (html_fn )
233+
234+ root = tree .getroot ()
235+ head = root .find ('head' )
236+ container , = root .xpath ("//div[@id='notebook-container']" )
237+
238+ headers = container .xpath ('//h1' )
239+ if headers :
240+ title = headers [0 ].text
241+ if isinstance (title , unicode ):
242+ title = title .encode ('utf-8' )
243+ h1_parent = headers [0 ].getparent ()
244+ h1_parent .remove (headers [0 ])
245+
246+ lines .extend ([u".. raw:: html" , u"" ])
247+
248+ for element in head .getchildren ():
249+ if element .tag in ('script' ,):
250+ text = lxml .html .tostring (element )
251+ lines .extend (" " + x for x in text .splitlines ())
252+
253+ text = lxml .html .tostring (container )
254+
255+ m = re .search (ur'<p>TAGS:\s*(.*)\s*</p>' , text )
256+ if m :
257+ tag_line = m .group (1 ).strip ().replace (';' , ',' )
258+ if isinstance (tag_line , unicode ):
259+ tag_line = tag_line .encode ('utf-8' )
260+ tags .update ([x .strip () for x in tag_line .split ("," )])
261+ text = text [:m .start ()] + text [m .end ():]
262+
263+ m = re .search (ur'<p>AUTHORS:\s*(.*)\s*</p>' , text )
264+ if m :
265+ # Author lines override editors
266+ if legacy_editors :
267+ editors = []
268+ legacy_editors = False
269+ author_line = m .group (1 ).strip ().replace (';' , ',' )
270+ if isinstance (author_line , unicode ):
271+ author_line = author_line .encode ('utf-8' )
272+ for author in author_line .split ("," ):
273+ author = author .strip ()
274+ if author and author not in editors :
275+ editors .append (author )
276+
277+ text = text [:m .start ()] + text [m .end ():]
278+
279+ text = text .replace (u'attachments/{0}/' .format (basename ),
280+ u'../_downloads/' )
281+
282+ lines .extend (u" " + x for x in text .splitlines ())
283+ lines .append (u"" )
258284
259- prev_line = line
260- lines .append (orig_line )
261-
262- text = "" .join (lines )
285+ # Produce output
286+ text = u"\n " .join (lines ).encode ('utf-8' )
263287
264288 if not title :
265289 title = basename
266290
267291 authors = ", " .join (editors )
268292 text = "{0}\n {1}\n \n {2}" .format (title , "=" * len (title ), text )
269293
270- text = re .sub (r'`(.*?) <files/(attachments/.*?)>`__' ,
271- r':download:`\1 <\2>`' ,
272- text ,
273- flags = re .M )
274- text = re .sub (r'^TAGS:.*$' , '' , text , flags = re .M )
275- text = re .sub (r'(figure|image):: files/attachments/' , r'\1:: attachments/' , text , flags = re .M )
276- text = re .sub (r' <files/attachments/' , r' <attachments/' , text , flags = re .M )
277- text = re .sub (r'.. parsed-literal::' , r'.. parsed-literal::\n :class: ipy-out' , text , flags = re .M )
278- text = re .sub (r'`([^`<]*)\s+<(?!attachments/)([^:.>]*?)(?:.html)?>`__' , r':doc:`\1 <\2>`' , text , flags = re .M )
279- text = re .sub (r'^(\s*)\.\.\s*raw:: latex' , '\\ 1.. math::\\ 1 :nowrap:' , text , flags = re .M )
280- text = re .sub (r'^(\s*)\.\. code::\s*(ipython3|ipython2|python3|python2|python)?\s*$' , r'\1.. code-block:: python\n' , text , flags = re .M )
281294 with open (rst_fn , 'w' ) as f :
282295 f .write (text )
283296 if authors :
0 commit comments