Skip to content

Commit 3eca18e

Browse files
authored
Parse EPUB 2 guide as EPUB 3 landmarks (#628)
1 parent 665d6f8 commit 3eca18e

File tree

7 files changed

+149
-10
lines changed

7 files changed

+149
-10
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ All notable changes to this project will be documented in this file. Take a look
66

77
## [Unreleased]
88

9+
### Added
10+
11+
* The EPUB 2 `<guide>` element is now parsed into the RWPM `landmarks` subcollection when no EPUB 3 `landmarks` navigation document is declared (contributed by [@erkasraim](https://github.com/readium/kotlin-toolkit/pull/628)).
12+
913
### Changed
1014

1115
* Jetifier is not required anymore, you can remove `android.enableJetifier=true` from your `gradle.properties` if you were using Readium as a local clone.

readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/ManifestAdapter.kt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,16 @@ internal class ManifestAdapter(
5858
}
5959
}
6060
.mapValues { listOf(PublicationCollection(links = it.value)) }
61+
.toMutableMap()
62+
63+
// EPUB 3 Reading Systems must ignore the guide element when provided in EPUB 3 Publications
64+
// whose EPUB Navigation Document includes the landmarks feature.
65+
// https://idpf.org/epub/30/spec/epub30-publications.html#sec-guide-elem
66+
if (!subcollections.contains("landmarks") && packageDocument.guide.isNotEmpty()) {
67+
// EPUB 2.0 doesn't have a landmarks collection, so we use the guide as a fallback
68+
// If an EPUB 3.0+ file does not have landmarks, it will use guide instead.
69+
subcollections["landmarks"] = listOf(PublicationCollection(links = packageDocument.guide))
70+
}
6171

6272
// Build Publication object
6373
return Manifest(
@@ -66,7 +76,7 @@ internal class ManifestAdapter(
6676
readingOrder = readingOrder,
6777
resources = resources,
6878
tableOfContents = toc,
69-
subcollections = subcollections
79+
subcollections = subcollections.toMap()
7080
)
7181
}
7282
}

readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/NavigationDocumentParser.kt

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@ internal object NavigationDocumentParser {
5252
DEFAULT_VOCAB.TYPE
5353
)
5454
}
55-
val links = nav.getFirst("ol", Namespaces.XHTML)?.let { parseOlElement(it, filePath) }
55+
val links = nav.getFirst("ol", Namespaces.XHTML)?.let { parseOlElement(it, filePath, prefixMap) }
5656
return if (types.isNotEmpty() && !links.isNullOrEmpty()) Pair(types, links) else null
5757
}
5858

59-
private fun parseOlElement(element: ElementNode, filePath: Url): List<Link> =
60-
element.get("li", Namespaces.XHTML).mapNotNull { parseLiElement(it, filePath) }
59+
private fun parseOlElement(element: ElementNode, filePath: Url, prefixMap: Map<String, String>): List<Link> =
60+
element.get("li", Namespaces.XHTML).mapNotNull { parseLiElement(it, filePath, prefixMap) }
6161

6262
@OptIn(DelicateReadiumApi::class)
63-
private fun parseLiElement(element: ElementNode, filePath: Url): Link? {
63+
private fun parseLiElement(element: ElementNode, filePath: Url, prefixMap: Map<String, String>): Link? {
6464
val first = element.getAll().firstOrNull() ?: return null // should be <a>, <span>, or <ol>
6565
val title = if (first.name == "ol") {
6666
""
@@ -76,15 +76,35 @@ internal object NavigationDocumentParser {
7676
} else {
7777
Url("#")!!
7878
}
79-
val children = element.getFirst("ol", Namespaces.XHTML)?.let { parseOlElement(it, filePath) }.orEmpty()
79+
val children = element.getFirst("ol", Namespaces.XHTML)?.let {
80+
parseOlElement(
81+
it,
82+
filePath,
83+
prefixMap
84+
)
85+
}.orEmpty()
86+
87+
val typeAttr = first.getAttrNs("type", Namespaces.OPS) ?: ""
88+
val rels = if (typeAttr.isNotEmpty()) {
89+
parseProperties(typeAttr).map {
90+
resolveProperty(
91+
it,
92+
prefixMap,
93+
DEFAULT_VOCAB.TYPE
94+
)
95+
}.toSet()
96+
} else {
97+
emptySet()
98+
}
8099

81100
return if (children.isEmpty() && (href.toString() == "#" || title == "")) {
82101
null
83102
} else {
84103
Link(
85104
title = title,
86105
href = href,
87-
children = children
106+
children = children,
107+
rels = rels
88108
)
89109
}
90110
}

readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/PackageDocument.kt

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
package org.readium.r2.streamer.parser.epub
1010

1111
import org.readium.r2.shared.InternalReadiumApi
12+
import org.readium.r2.shared.publication.Link
1213
import org.readium.r2.shared.publication.ReadingProgression
1314
import org.readium.r2.shared.util.Url
1415
import org.readium.r2.shared.util.fromEpubHref
@@ -21,6 +22,7 @@ internal data class PackageDocument(
2122
val metadata: List<MetadataItem>,
2223
val manifest: List<Item>,
2324
val spine: Spine,
25+
val guide: List<Link>,
2426
) {
2527

2628
companion object {
@@ -34,6 +36,7 @@ internal data class PackageDocument(
3436
?: return null
3537
val spineElement = document.getFirst("spine", Namespaces.OPF)
3638
?: return null
39+
val guideElement = document.getFirst("guide", Namespaces.OPF)
3740

3841
return PackageDocument(
3942
path = filePath,
@@ -42,7 +45,8 @@ internal data class PackageDocument(
4245
metadata = metadata,
4346
manifest = manifestElement.get("item", Namespaces.OPF)
4447
.mapNotNull { Item.parse(it, filePath, prefixMap) },
45-
spine = Spine.parse(spineElement, prefixMap, epubVersion)
48+
spine = Spine.parse(spineElement, prefixMap, epubVersion),
49+
guide = Guide.parse(guideElement, filePath, prefixMap),
4650
)
4751
}
4852
}
@@ -106,6 +110,46 @@ internal data class Spine(
106110
}
107111
}
108112

113+
internal data class Guide(
114+
val links: List<Link>,
115+
) {
116+
companion object {
117+
// Epub 3.0+ does not support the guide element
118+
// https://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#TOC2.6
119+
fun parse(element: ElementNode?, filePath: Url, prefixMap: Map<String, String>): List<Link> {
120+
if (element == null) return emptyList()
121+
122+
return element.get("reference", Namespaces.OPF).mapNotNull { node ->
123+
val href = node.getAttr("href")
124+
?.let { Url.fromEpubHref(it) }
125+
?.let { filePath.resolve(it) }
126+
?: return@mapNotNull null
127+
val rels = node.getAttr("type")?.let {
128+
setOf(mapToEPUB3Spec(it, prefixMap))
129+
} ?: emptySet()
130+
131+
Link(
132+
href = href,
133+
title = node.getAttr("title"),
134+
rels = rels,
135+
)
136+
}
137+
}
138+
139+
private fun mapToEPUB3Spec(type: String, prefixMap: Map<String, String>): String {
140+
return when (type) {
141+
"title-page" -> "titlepage"
142+
"text" -> "bodymatter"
143+
"acknowledgements" -> "acknowledgments" // American English
144+
"notes" -> "endnotes" // endnotes or footnotes. https://www.w3.org/TR/epub-ssv-11/#notes
145+
else -> type
146+
}.let {
147+
resolveProperty(it, prefixMap, DEFAULT_VOCAB.TYPE)
148+
}
149+
}
150+
}
151+
}
152+
109153
internal data class Itemref(
110154
val idref: String,
111155
val linear: Boolean,

readium/streamer/src/test/java/org/readium/r2/streamer/parser/epub/NavigationDocumentParserTest.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@ class NavigationDocumentParserTest {
135135
@Test
136136
fun `landmarks are rightly parsed`() {
137137
assertThat(navComplex["landmarks"]).containsExactly(
138-
Link(title = "Table of Contents", href = Href("OEBPS/xhtml/nav.xhtml#toc")!!),
139-
Link(title = "Begin Reading", href = Href("OEBPS/xhtml/chapter1.xhtml")!!)
138+
Link(title = "Table of Contents", href = Href("OEBPS/xhtml/nav.xhtml#toc")!!, rels = setOf("http://idpf.org/epub/vocab/structure/#toc")),
139+
Link(title = "Begin Reading", href = Href("OEBPS/xhtml/chapter1.xhtml")!!, rels = setOf("http://idpf.org/epub/vocab/structure/#bodymatter"))
140140
)
141141
}
142142

readium/streamer/src/test/java/org/readium/r2/streamer/parser/epub/PackageDocumentTest.kt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@
1212
package org.readium.r2.streamer.parser.epub
1313

1414
import org.assertj.core.api.Assertions.assertThat
15+
import org.assertj.core.api.Assertions.entry
1516
import org.junit.Test
1617
import org.junit.runner.RunWith
1718
import org.readium.r2.shared.InternalReadiumApi
1819
import org.readium.r2.shared.publication.Href
1920
import org.readium.r2.shared.publication.Link
2021
import org.readium.r2.shared.publication.Manifest
22+
import org.readium.r2.shared.publication.PublicationCollection
2123
import org.readium.r2.shared.publication.ReadingProgression
2224
import org.readium.r2.shared.publication.epub.EpubLayout
2325
import org.readium.r2.shared.publication.epub.contains
@@ -231,3 +233,38 @@ class LinkMiscTest {
231233
parsePackageDocument("package/fallbacks-termination.opf")
232234
}
233235
}
236+
237+
@RunWith(RobolectricTestRunner::class)
238+
class GuideTest {
239+
private val guidePub = parsePackageDocument("package/guide-epub2.opf")
240+
241+
@Test
242+
fun `Guide is rightly computed`() {
243+
assertThat(guidePub.subcollections).containsExactly(
244+
entry(
245+
"landmarks",
246+
listOf(
247+
PublicationCollection(
248+
links = listOf(
249+
Link(
250+
href = Href("OEBPS/toc.html")!!,
251+
title = "Table of Contents",
252+
rels = setOf("http://idpf.org/epub/vocab/structure/#toc")
253+
),
254+
Link(
255+
href = Href("OEBPS/toc.html#figures")!!,
256+
title = "List Of Illustrations",
257+
rels = setOf("http://idpf.org/epub/vocab/structure/#loi")
258+
),
259+
Link(
260+
href = Href("OEBPS/beginpage.html")!!,
261+
title = "Introduction",
262+
rels = setOf("http://idpf.org/epub/vocab/structure/#bodymatter")
263+
),
264+
)
265+
)
266+
)
267+
)
268+
)
269+
}
270+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<?xml version="1.0"?>
2+
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="pub-id" version="2.0">
3+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
4+
<dc:title>Alice's Adventures in Wonderland</dc:title>
5+
<meta name="cover" content="cover" />
6+
</metadata>
7+
<manifest>
8+
<item id="cover" href="cover.jpg" media-type="image/jpeg" />
9+
<item id="titlepage" href="titlepage.xhtml"/>
10+
<item id="beginpage" href="beginpage.xhtml"/>
11+
</manifest>
12+
<spine>
13+
<itemref idref="titlepage"/>
14+
<itemref idref="beginpage"/>
15+
</spine>
16+
<guide>
17+
<reference type="toc" title="Table of Contents"
18+
href="toc.html" />
19+
<reference type="loi" title="List Of Illustrations"
20+
href="toc.html#figures" />
21+
<reference type="text" title="Introduction"
22+
href="beginpage.html" />
23+
</guide>
24+
</package>

0 commit comments

Comments
 (0)