15
15
"opf" :"http://www.idpf.org/2007/opf" ,
16
16
"u" :"urn:oasis:names:tc:opendocument:xmlns:container" ,
17
17
"xsi" :"http://www.w3.org/2001/XMLSchema-instance" ,
18
+ "xhtml" :"http://www.w3.org/1999/xhtml"
18
19
}
19
20
20
21
@@ -40,32 +41,90 @@ def get_epub_cover(epub_path):
40
41
41
42
# We load the "root" file, indicated by the "full_path" attribute of "META-INF/container.xml", using lxml.etree.fromString():
42
43
t = etree .fromstring (z .read (rootfile_path ))
43
- # We use xpath() to find the attribute "content":
44
- '''
45
- <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
46
- ...
47
- <meta content="my-cover-image" name="cover"/>
48
- ...
49
- </metadata>
50
- '''
51
- cover_id = t .xpath ("//opf:metadata/opf:meta[@name='cover']" ,
52
- namespaces = namespaces )[0 ].get ("content" )
53
- print ("ID of cover image found: " + cover_id )
44
+
45
+ cover_href = None
46
+ try :
47
+ # For EPUB 2.0, we use xpath() to find a <meta>
48
+ # named "cover" and get the attribute "content":
49
+ '''
50
+ <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
51
+ ...
52
+ <meta content="my-cover-image" name="cover"/>
53
+ ...
54
+ </metadata> '''
55
+
56
+ cover_id = t .xpath ("//opf:metadata/opf:meta[@name='cover']" ,
57
+ namespaces = namespaces )[0 ].get ("content" )
58
+ print ("ID of cover image found: " + cover_id )
59
+ # Next, we use xpath() to find the <item> (in <manifest>) with this id
60
+ # and get the attribute "href":
61
+ '''
62
+ <manifest>
63
+ ...
64
+ <item id="my-cover-image" href="images/978.jpg" ... />
65
+ ...
66
+ </manifest>
67
+ '''
68
+ cover_href = t .xpath ("//opf:manifest/opf:item[@id='" + cover_id + "']" ,
69
+ namespaces = namespaces )[0 ].get ("href" )
70
+ except IndexError :
71
+ pass
54
72
55
- # We use xpath() to find the attribute "href":
56
- '''
57
- <manifest>
58
- ...
59
- <item id="my-cover-image" href="images/978.jpg" ... />
60
- ...
61
- </manifest>
62
- '''
63
- cover_href = t .xpath ("//opf:manifest/opf:item[@id='" + cover_id + "']" ,
64
- namespaces = namespaces )[0 ].get ("href" )
65
- # In order to get the full path for the cover image, we have to join rootfile_path and cover_href:
73
+ if not cover_href :
74
+ # For EPUB 3.0, We use xpath to find the <item> (in <manifest>) that
75
+ # has properties='cover-image' and get the attribute "href":
76
+ '''
77
+ <manifest>
78
+ ...
79
+ <item href="images/cover.png" id="cover-img" media-type="image/png" properties="cover-image"/>
80
+ ...
81
+ </manifest>
82
+ '''
83
+ try :
84
+ cover_href = t .xpath ("//opf:manifest/opf:item[@properties='cover-image']" ,
85
+ namespaces = namespaces )[0 ].get ("href" )
86
+ except IndexError :
87
+ pass
88
+
89
+ if not cover_href :
90
+ # Some EPUB files do not declare explicitly a cover image.
91
+ # Instead, they use an "<img src=''>" inside the first xhmtl file.
92
+ try :
93
+ # The <spine> is a list that defines the linear reading order
94
+ # of the content documents of the book. The first item in the
95
+ # list is the first item in the book.
96
+ '''
97
+ <spine toc="ncx">
98
+ <itemref idref="cover"/>
99
+ <itemref idref="nav"/>
100
+ <itemref idref="s04"/>
101
+ </spine>
102
+ '''
103
+ cover_page_id = t .xpath ("//opf:spine/opf:itemref" ,
104
+ namespaces = namespaces )[0 ].get ("idref" )
105
+ # Next, we use xpath() to find the item (in manifest) with this id
106
+ # and get the attribute "href":
107
+ cover_page_href = t .xpath ("//opf:manifest/opf:item[@id='" + cover_page_id + "']" ,
108
+ namespaces = namespaces )[0 ].get ("href" )
109
+ # In order to get the full path for the cover page,
110
+ # we have to join rootfile_path and cover_page_href:
111
+ cover_page_path = os .path .join (os .path .dirname (rootfile_path ), cover_page_href )
112
+ print ("Path of cover page found: " + cover_page_path )
113
+ # We try to find the <img> and get the "src" attribute:
114
+ t = etree .fromstring (z .read (cover_page_path ))
115
+ cover_href = t .xpath ("//xhtml:img" , namespaces = namespaces )[0 ].get ("src" )
116
+ except IndexError :
117
+ pass
118
+
119
+ if not cover_href :
120
+ print ("Cover image not found." )
121
+ return None
122
+
123
+ # In order to get the full path for the cover image,
124
+ # we have to join rootfile_path and cover_href:
66
125
cover_path = os .path .join (os .path .dirname (rootfile_path ), cover_href )
67
- print ("Path of cover image found: " + cover_path )
68
-
126
+ print ("Path of cover image found: " + cover_path )
127
+
69
128
# We return the image
70
129
return z .open (cover_path )
71
130
@@ -79,7 +138,10 @@ def get_epub_cover(epub_path):
79
138
print ("File not found: " + epubfile )
80
139
exit ()
81
140
82
- image = Image .open (get_epub_cover (epubfile ))
141
+ cover = get_epub_cover (epubfile )
142
+ if not cover :
143
+ exit ()
144
+ image = Image .open (cover )
83
145
image .show ()
84
146
85
147
0 commit comments