Skip to content

Commit 919d8fc

Browse files
author
Torsten Krause
committed
Allow text nodes to represent Markdom blocks
1 parent c593e4a commit 919d8fc

File tree

2 files changed

+66
-37
lines changed

2 files changed

+66
-37
lines changed

module/handler/html-cleaner/src/main/java/io/markdom/handler/html/cleaner/HtmlCleanerDocumentResult.java

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,28 @@
11
package io.markdom.handler.html.cleaner;
22

3-
import java.util.Iterator;
3+
import java.util.HashMap;
44
import java.util.List;
5+
import java.util.Map;
56
import java.util.regex.Pattern;
67

8+
import org.htmlcleaner.CData;
79
import org.htmlcleaner.CleanerProperties;
10+
import org.htmlcleaner.ContentNode;
811
import org.htmlcleaner.DoctypeToken;
912
import org.htmlcleaner.HtmlCleaner;
13+
import org.htmlcleaner.HtmlNode;
1014
import org.htmlcleaner.PrettyHtmlSerializer;
1115
import org.htmlcleaner.Serializer;
1216
import org.htmlcleaner.SimpleHtmlSerializer;
1317
import org.htmlcleaner.TagNode;
18+
import org.htmlcleaner.TagNodeVisitor;
1419

1520
import io.markdom.handler.html.HtmlDocumentResult;
1621
import io.markdom.util.Attribute;
1722
import io.markdom.util.Attributes;
1823
import io.markdom.util.ObjectHelper;
1924

20-
public final class HtmlCleanerDocumentResult implements HtmlDocumentResult<TagNode, TagNode, List<TagNode>> {
25+
public final class HtmlCleanerDocumentResult implements HtmlDocumentResult<TagNode, TagNode, List<HtmlNode>> {
2126

2227
private final HtmlCleaner cleaner;
2328

@@ -59,9 +64,7 @@ public TagNode asElement(String tagName, Attributes attributes) {
5964
for (Attribute attribute : attributes) {
6065
tagNode.addAttribute(attribute.getKey(), attribute.getValue());
6166
}
62-
for (TagNode blockNode : asElements()) {
63-
tagNode.addChild(makeDeepCopy(blockNode));
64-
}
67+
tagNode.addChildren(asElements());
6568
return tagNode;
6669
}
6770

@@ -71,28 +74,19 @@ public String asElementText(String tagName, Attributes attributes, boolean prett
7174
}
7275

7376
@Override
74-
public List<TagNode> asElements() {
75-
return asDocument().findElementByName("body", true).getChildTagList();
77+
@SuppressWarnings("unchecked")
78+
public List<HtmlNode> asElements() {
79+
return (List<HtmlNode>) asDocument().findElementByName("body", true).getAllChildren();
7680
}
7781

7882
@Override
7983
public String asElementsText(boolean pretty) {
80-
StringBuilder builder = new StringBuilder();
81-
Iterator<TagNode> iterator = asElements().iterator();
82-
if (iterator.hasNext()) {
83-
append(builder, iterator.next(), pretty);
84-
while (iterator.hasNext()) {
85-
if (pretty) {
86-
builder.append("\n");
87-
}
88-
append(builder, iterator.next(), pretty);
89-
}
84+
String elementText = asElementText("foo", pretty);
85+
if (pretty) {
86+
return elementText.substring("<foo>\n".length(), elementText.length() - "\n</foo>".length());
87+
} else {
88+
return elementText.substring("<foo>".length(), elementText.length() - "</foo>".length());
9089
}
91-
return builder.toString();
92-
}
93-
94-
private void append(StringBuilder builder, TagNode blockNode, boolean pretty) {
95-
builder.append(asText(blockNode, pretty));
9690
}
9791

9892
private String asText(TagNode tagNode, boolean pretty) {
@@ -113,11 +107,45 @@ private Serializer selectSerializer(boolean pretty) {
113107
}
114108

115109
private TagNode makeDeepCopy(TagNode tagNode) {
116-
TagNode copyNode = tagNode.makeCopy();
117-
for (TagNode childNode : tagNode.getChildTagList()) {
118-
copyNode.addChild(makeDeepCopy(childNode));
119-
}
120-
return tagNode;
110+
111+
Map<TagNode, TagNode> copyMap = new HashMap<>();
112+
113+
tagNode.traverse(new TagNodeVisitor() {
114+
115+
@Override
116+
public boolean visit(TagNode parentNode, HtmlNode htmlNode) {
117+
118+
if (htmlNode instanceof TagNode) {
119+
120+
TagNode tagNode = (TagNode) htmlNode;
121+
TagNode copyNode = tagNode.makeCopy();
122+
copyMap.put(tagNode, copyNode);
123+
if (null != parentNode) {
124+
copyMap.get(parentNode).addChild(copyNode);
125+
}
126+
127+
} else if (htmlNode instanceof CData) {
128+
129+
CData cdata = (CData) htmlNode;
130+
if (null != parentNode) {
131+
copyMap.get(parentNode).addChild(new CData(cdata.getContent()));
132+
}
133+
134+
} else if (htmlNode instanceof ContentNode) {
135+
136+
ContentNode contentNode = (ContentNode) htmlNode;
137+
if (null != parentNode) {
138+
copyMap.get(parentNode).addChild(new ContentNode(contentNode.getContent()));
139+
}
140+
141+
}
142+
143+
return true;
144+
145+
}
146+
});
147+
148+
return copyMap.get(tagNode);
121149
}
122150

123151
}

module/handler/html-jsoup/src/main/java/io/markdom/handler/html/jsoup/JsoupHtmlDocumentResult.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
package io.markdom.handler.html.jsoup;
22

33
import java.util.Iterator;
4+
import java.util.List;
45

56
import org.jsoup.nodes.Document;
67
import org.jsoup.nodes.Element;
7-
import org.jsoup.select.Elements;
8+
import org.jsoup.nodes.Node;
89

910
import io.markdom.handler.html.HtmlDocumentResult;
1011
import io.markdom.util.Attribute;
1112
import io.markdom.util.Attributes;
1213
import io.markdom.util.ObjectHelper;
1314

14-
public final class JsoupHtmlDocumentResult implements HtmlDocumentResult<Document, Element, Elements> {
15+
public final class JsoupHtmlDocumentResult implements HtmlDocumentResult<Document, Element, List<Node>> {
1516

1617
private final Document document;
1718

@@ -38,8 +39,8 @@ public Element asElement(String tagName, Attributes attributes) {
3839
for (Attribute attribute : attributes) {
3940
element.attr(attribute.getKey(), attribute.getValue());
4041
}
41-
for (Element blockElement : asElements()) {
42-
element.appendChild(blockElement.clone());
42+
for (Node blockNode : asElements()) {
43+
element.appendChild(blockNode.clone());
4344
}
4445
return element;
4546
}
@@ -52,14 +53,14 @@ public String asElementText(String tagName, Attributes attributes, boolean prett
5253
}
5354

5455
@Override
55-
public Elements asElements() {
56-
return asDocument().body().children();
56+
public List<Node> asElements() {
57+
return asDocument().body().childNodes();
5758
}
5859

5960
@Override
6061
public String asElementsText(boolean pretty) {
6162
StringBuilder builder = new StringBuilder();
62-
Iterator<Element> iterator = asElements().iterator();
63+
Iterator<Node> iterator = asElements().iterator();
6364
if (iterator.hasNext()) {
6465
append(builder, iterator.next(), pretty);
6566
while (iterator.hasNext()) {
@@ -72,9 +73,9 @@ public String asElementsText(boolean pretty) {
7273
return builder.toString();
7374
}
7475

75-
private void append(StringBuilder builder, Element blockElement, boolean pretty) {
76-
blockElement.ownerDocument().outputSettings().prettyPrint(pretty);
77-
builder.append(blockElement.outerHtml());
76+
private void append(StringBuilder builder, Node blockNode, boolean pretty) {
77+
blockNode.ownerDocument().outputSettings().prettyPrint(pretty);
78+
builder.append(blockNode.outerHtml());
7879
}
7980

8081
}

0 commit comments

Comments
 (0)