Skip to content

Commit 9fb694a

Browse files
committed
Automatic commit Fri May 27 05:57:21 PM EEST 2022
1 parent 13b2fba commit 9fb694a

File tree

1 file changed

+87
-25
lines changed

1 file changed

+87
-25
lines changed

epub/epub-show-cover.py

Lines changed: 87 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"opf":"http://www.idpf.org/2007/opf",
1616
"u":"urn:oasis:names:tc:opendocument:xmlns:container",
1717
"xsi":"http://www.w3.org/2001/XMLSchema-instance",
18+
"xhtml":"http://www.w3.org/1999/xhtml"
1819
}
1920

2021

@@ -40,32 +41,90 @@ def get_epub_cover(epub_path):
4041

4142
# We load the "root" file, indicated by the "full_path" attribute of "META-INF/container.xml", using lxml.etree.fromString():
4243
t = etree.fromstring(z.read(rootfile_path))
43-
# We use xpath() to find the attribute "content":
44-
'''
45-
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
46-
...
47-
<meta content="my-cover-image" name="cover"/>
48-
...
49-
</metadata>
50-
'''
51-
cover_id = t.xpath("//opf:metadata/opf:meta[@name='cover']",
52-
namespaces=namespaces)[0].get("content")
53-
print("ID of cover image found: " + cover_id)
44+
45+
cover_href = None
46+
try:
47+
# For EPUB 2.0, we use xpath() to find a <meta>
48+
# named "cover" and get the attribute "content":
49+
'''
50+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
51+
...
52+
<meta content="my-cover-image" name="cover"/>
53+
...
54+
</metadata> '''
55+
56+
cover_id = t.xpath("//opf:metadata/opf:meta[@name='cover']",
57+
namespaces=namespaces)[0].get("content")
58+
print("ID of cover image found: " + cover_id)
59+
# Next, we use xpath() to find the <item> (in <manifest>) with this id
60+
# and get the attribute "href":
61+
'''
62+
<manifest>
63+
...
64+
<item id="my-cover-image" href="images/978.jpg" ... />
65+
...
66+
</manifest>
67+
'''
68+
cover_href = t.xpath("//opf:manifest/opf:item[@id='" + cover_id + "']",
69+
namespaces=namespaces)[0].get("href")
70+
except IndexError:
71+
pass
5472

55-
# We use xpath() to find the attribute "href":
56-
'''
57-
<manifest>
58-
...
59-
<item id="my-cover-image" href="images/978.jpg" ... />
60-
...
61-
</manifest>
62-
'''
63-
cover_href = t.xpath("//opf:manifest/opf:item[@id='" + cover_id + "']",
64-
namespaces=namespaces)[0].get("href")
65-
# In order to get the full path for the cover image, we have to join rootfile_path and cover_href:
73+
if not cover_href:
74+
# For EPUB 3.0, We use xpath to find the <item> (in <manifest>) that
75+
# has properties='cover-image' and get the attribute "href":
76+
'''
77+
<manifest>
78+
...
79+
<item href="images/cover.png" id="cover-img" media-type="image/png" properties="cover-image"/>
80+
...
81+
</manifest>
82+
'''
83+
try:
84+
cover_href = t.xpath("//opf:manifest/opf:item[@properties='cover-image']",
85+
namespaces=namespaces)[0].get("href")
86+
except IndexError:
87+
pass
88+
89+
if not cover_href:
90+
# Some EPUB files do not declare explicitly a cover image.
91+
# Instead, they use an "<img src=''>" inside the first xhmtl file.
92+
try:
93+
# The <spine> is a list that defines the linear reading order
94+
# of the content documents of the book. The first item in the
95+
# list is the first item in the book.
96+
'''
97+
<spine toc="ncx">
98+
<itemref idref="cover"/>
99+
<itemref idref="nav"/>
100+
<itemref idref="s04"/>
101+
</spine>
102+
'''
103+
cover_page_id = t.xpath("//opf:spine/opf:itemref",
104+
namespaces=namespaces)[0].get("idref")
105+
# Next, we use xpath() to find the item (in manifest) with this id
106+
# and get the attribute "href":
107+
cover_page_href = t.xpath("//opf:manifest/opf:item[@id='" + cover_page_id + "']",
108+
namespaces=namespaces)[0].get("href")
109+
# In order to get the full path for the cover page,
110+
# we have to join rootfile_path and cover_page_href:
111+
cover_page_path = os.path.join(os.path.dirname(rootfile_path), cover_page_href)
112+
print("Path of cover page found: " + cover_page_path)
113+
# We try to find the <img> and get the "src" attribute:
114+
t = etree.fromstring(z.read(cover_page_path))
115+
cover_href = t.xpath("//xhtml:img", namespaces=namespaces)[0].get("src")
116+
except IndexError:
117+
pass
118+
119+
if not cover_href:
120+
print("Cover image not found.")
121+
return None
122+
123+
# In order to get the full path for the cover image,
124+
# we have to join rootfile_path and cover_href:
66125
cover_path = os.path.join(os.path.dirname(rootfile_path), cover_href)
67-
print("Path of cover image found: " + cover_path)
68-
126+
print("Path of cover image found: " + cover_path)
127+
69128
# We return the image
70129
return z.open(cover_path)
71130

@@ -79,7 +138,10 @@ def get_epub_cover(epub_path):
79138
print("File not found: " + epubfile)
80139
exit()
81140

82-
image = Image.open(get_epub_cover(epubfile))
141+
cover = get_epub_cover(epubfile)
142+
if not cover:
143+
exit()
144+
image = Image.open(cover)
83145
image.show()
84146

85147

0 commit comments

Comments
 (0)