Automatic commit Fri May 27 05:57:21 PM EEST 2022

Alamot · Alamot · commit 9fb694a2de33 · 2022-05-27T17:57:22.000+03:00
diff --git a/epub/epub-show-cover.py b/epub/epub-show-cover.py
@@ -15,6 +15,7 @@
    "opf":"http://www.idpf.org/2007/opf",
    "u":"urn:oasis:names:tc:opendocument:xmlns:container",
    "xsi":"http://www.w3.org/2001/XMLSchema-instance",
+   "xhtml":"http://www.w3.org/1999/xhtml"
 }
 
 
@@ -40,32 +41,90 @@ def get_epub_cover(epub_path):
         
         # We load the "root" file, indicated by the "full_path" attribute of "META-INF/container.xml", using lxml.etree.fromString():
         t = etree.fromstring(z.read(rootfile_path))
-        # We use xpath() to find the attribute "content":
-        '''
-        <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
-          ...
-          <meta content="my-cover-image" name="cover"/>
-          ...
-        </metadata>
-        '''
-        cover_id = t.xpath("//opf:metadata/opf:meta[@name='cover']",
-                                    namespaces=namespaces)[0].get("content")
-        print("ID of cover image found: " + cover_id)
+
+        cover_href = None
+        try:
+            # For EPUB 2.0, we use xpath() to find a <meta> 
+            # named "cover" and get the attribute "content":
+            '''
+            <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
+              ...
+              <meta content="my-cover-image" name="cover"/>
+              ...
+            </metadata>            '''
+
+            cover_id = t.xpath("//opf:metadata/opf:meta[@name='cover']",
+                                      namespaces=namespaces)[0].get("content")
+            print("ID of cover image found: " + cover_id)
+            # Next, we use xpath() to find the <item> (in <manifest>) with this id 
+            # and get the attribute "href":
+            '''
+            <manifest>
+                ...
+                <item id="my-cover-image" href="images/978.jpg" ... />
+                ... 
+            </manifest>
+            '''
+            cover_href = t.xpath("//opf:manifest/opf:item[@id='" + cover_id + "']",
+                                 namespaces=namespaces)[0].get("href")
+        except IndexError:
+            pass
         
-        # We use xpath() to find the attribute "href":
-        '''
-        <manifest>
-            ...
-            <item id="my-cover-image" href="images/978.jpg" ... />
-            ... 
-        </manifest>
-        '''
-        cover_href = t.xpath("//opf:manifest/opf:item[@id='" + cover_id + "']",
-                                         namespaces=namespaces)[0].get("href")
-        # In order to get the full path for the cover image, we have to join rootfile_path and cover_href:
+        if not cover_href:
+            # For EPUB 3.0, We use xpath to find the <item> (in <manifest>) that
+            # has properties='cover-image' and get the attribute "href":
+            '''
+            <manifest>
+              ...
+              <item href="images/cover.png" id="cover-img" media-type="image/png" properties="cover-image"/>
+              ...
+            </manifest>
+            '''
+            try:
+                cover_href = t.xpath("//opf:manifest/opf:item[@properties='cover-image']",
+                                     namespaces=namespaces)[0].get("href")
+            except IndexError:
+                pass
+
+        if not cover_href:
+            # Some EPUB files do not declare explicitly a cover image.
+            # Instead, they use an "<img src=''>" inside the first xhmtl file.
+            try:
+                # The <spine> is a list that defines the linear reading order
+                # of the content documents of the book. The first item in the  
+                # list is the first item in the book.  
+                '''
+                <spine toc="ncx">
+                  <itemref idref="cover"/>
+                  <itemref idref="nav"/>
+                  <itemref idref="s04"/>
+                </spine>
+                '''
+                cover_page_id = t.xpath("//opf:spine/opf:itemref",
+                                        namespaces=namespaces)[0].get("idref")
+                # Next, we use xpath() to find the item (in manifest) with this id 
+                # and get the attribute "href":
+                cover_page_href = t.xpath("//opf:manifest/opf:item[@id='" + cover_page_id + "']",
+                                          namespaces=namespaces)[0].get("href")
+                # In order to get the full path for the cover page,
+                # we have to join rootfile_path and cover_page_href:
+                cover_page_path = os.path.join(os.path.dirname(rootfile_path), cover_page_href)
+                print("Path of cover page found: " + cover_page_path)     
+                # We try to find the <img> and get the "src" attribute:
+                t = etree.fromstring(z.read(cover_page_path))              
+                cover_href = t.xpath("//xhtml:img", namespaces=namespaces)[0].get("src")
+            except IndexError:
+                pass
+
+        if not cover_href:
+            print("Cover image not found.")  
+            return None
+
+        # In order to get the full path for the cover image,
+        # we have to join rootfile_path and cover_href:
         cover_path = os.path.join(os.path.dirname(rootfile_path), cover_href)
-        print("Path of cover image found: " + cover_path)
-        
+        print("Path of cover image found: " + cover_path)                
+                
         # We return the image
         return z.open(cover_path)
 
@@ -79,7 +138,10 @@ def get_epub_cover(epub_path):
     print("File not found: " + epubfile)
     exit()
 
-image = Image.open(get_epub_cover(epubfile))
+cover = get_epub_cover(epubfile)
+if not cover:
+    exit()
+image = Image.open(cover)
 image.show()