11package io .markdom .handler .html .cleaner ;
22
3- import java .util .Iterator ;
3+ import java .util .HashMap ;
44import java .util .List ;
5+ import java .util .Map ;
56import java .util .regex .Pattern ;
67
8+ import org .htmlcleaner .CData ;
79import org .htmlcleaner .CleanerProperties ;
10+ import org .htmlcleaner .ContentNode ;
811import org .htmlcleaner .DoctypeToken ;
912import org .htmlcleaner .HtmlCleaner ;
13+ import org .htmlcleaner .HtmlNode ;
1014import org .htmlcleaner .PrettyHtmlSerializer ;
1115import org .htmlcleaner .Serializer ;
1216import org .htmlcleaner .SimpleHtmlSerializer ;
1317import org .htmlcleaner .TagNode ;
18+ import org .htmlcleaner .TagNodeVisitor ;
1419
1520import io .markdom .handler .html .HtmlDocumentResult ;
1621import io .markdom .util .Attribute ;
1722import io .markdom .util .Attributes ;
1823import io .markdom .util .ObjectHelper ;
1924
20- public final class HtmlCleanerDocumentResult implements HtmlDocumentResult <TagNode , TagNode , List <TagNode >> {
25+ public final class HtmlCleanerDocumentResult implements HtmlDocumentResult <TagNode , TagNode , List <HtmlNode >> {
2126
2227 private final HtmlCleaner cleaner ;
2328
@@ -59,9 +64,7 @@ public TagNode asElement(String tagName, Attributes attributes) {
5964 for (Attribute attribute : attributes ) {
6065 tagNode .addAttribute (attribute .getKey (), attribute .getValue ());
6166 }
62- for (TagNode blockNode : asElements ()) {
63- tagNode .addChild (makeDeepCopy (blockNode ));
64- }
67+ tagNode .addChildren (asElements ());
6568 return tagNode ;
6669 }
6770
@@ -71,28 +74,19 @@ public String asElementText(String tagName, Attributes attributes, boolean prett
7174 }
7275
7376 @ Override
74- public List <TagNode > asElements () {
75- return asDocument ().findElementByName ("body" , true ).getChildTagList ();
77+ @ SuppressWarnings ("unchecked" )
78+ public List <HtmlNode > asElements () {
79+ return (List <HtmlNode >) asDocument ().findElementByName ("body" , true ).getAllChildren ();
7680 }
7781
7882 @ Override
7983 public String asElementsText (boolean pretty ) {
80- StringBuilder builder = new StringBuilder ();
81- Iterator <TagNode > iterator = asElements ().iterator ();
82- if (iterator .hasNext ()) {
83- append (builder , iterator .next (), pretty );
84- while (iterator .hasNext ()) {
85- if (pretty ) {
86- builder .append ("\n " );
87- }
88- append (builder , iterator .next (), pretty );
89- }
84+ String elementText = asElementText ("foo" , pretty );
85+ if (pretty ) {
86+ return elementText .substring ("<foo>\n " .length (), elementText .length () - "\n </foo>" .length ());
87+ } else {
88+ return elementText .substring ("<foo>" .length (), elementText .length () - "</foo>" .length ());
9089 }
91- return builder .toString ();
92- }
93-
94- private void append (StringBuilder builder , TagNode blockNode , boolean pretty ) {
95- builder .append (asText (blockNode , pretty ));
9690 }
9791
9892 private String asText (TagNode tagNode , boolean pretty ) {
@@ -113,11 +107,45 @@ private Serializer selectSerializer(boolean pretty) {
113107 }
114108
115109 private TagNode makeDeepCopy (TagNode tagNode ) {
116- TagNode copyNode = tagNode .makeCopy ();
117- for (TagNode childNode : tagNode .getChildTagList ()) {
118- copyNode .addChild (makeDeepCopy (childNode ));
119- }
120- return tagNode ;
110+
111+ Map <TagNode , TagNode > copyMap = new HashMap <>();
112+
113+ tagNode .traverse (new TagNodeVisitor () {
114+
115+ @ Override
116+ public boolean visit (TagNode parentNode , HtmlNode htmlNode ) {
117+
118+ if (htmlNode instanceof TagNode ) {
119+
120+ TagNode tagNode = (TagNode ) htmlNode ;
121+ TagNode copyNode = tagNode .makeCopy ();
122+ copyMap .put (tagNode , copyNode );
123+ if (null != parentNode ) {
124+ copyMap .get (parentNode ).addChild (copyNode );
125+ }
126+
127+ } else if (htmlNode instanceof CData ) {
128+
129+ CData cdata = (CData ) htmlNode ;
130+ if (null != parentNode ) {
131+ copyMap .get (parentNode ).addChild (new CData (cdata .getContent ()));
132+ }
133+
134+ } else if (htmlNode instanceof ContentNode ) {
135+
136+ ContentNode contentNode = (ContentNode ) htmlNode ;
137+ if (null != parentNode ) {
138+ copyMap .get (parentNode ).addChild (new ContentNode (contentNode .getContent ()));
139+ }
140+
141+ }
142+
143+ return true ;
144+
145+ }
146+ });
147+
148+ return copyMap .get (tagNode );
121149 }
122150
123151}
0 commit comments