Skip to content

Commit 67e1f94

Browse files
author
Malo Jaffré
committed
Do not delete content of a non-empty cell.
When using `add_html_to_cell`, the function to delete an unwanted paragraph should only be called if the paragraph is indeed unwanted, i.e. if it is an empty paragraph automatically created when the cell was created.
1 parent 831c708 commit 67e1f94

File tree

2 files changed

+194
-188
lines changed

2 files changed

+194
-188
lines changed

htmldocx/h2d.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,8 @@ def add_html_to_cell(self, html, cell):
594594
if not isinstance(cell, docx.table._Cell):
595595
raise ValueError('Second argument needs to be a %s' % docx.table._Cell)
596596
unwanted_paragraph = cell.paragraphs[0]
597-
delete_paragraph(unwanted_paragraph)
597+
if unwanted_paragraph.text == "":
598+
delete_paragraph(unwanted_paragraph)
598599
self.set_initial_attrs(cell)
599600
self.run_process(html)
600601
# cells must end with a paragraph or will get message about corrupt file

tests/test.py

Lines changed: 192 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -1,187 +1,192 @@
1-
import os
2-
from pathlib import Path
3-
import unittest
4-
from docx import Document
5-
from .context import HtmlToDocx, test_dir
6-
7-
class OutputTest(unittest.TestCase):
8-
9-
@staticmethod
10-
def get_html_from_file(filename: str):
11-
file_path = Path(test_dir) / Path(filename)
12-
with open(file_path, 'r') as f:
13-
html = f.read()
14-
return html
15-
16-
@classmethod
17-
def setUpClass(cls):
18-
cls.document = Document()
19-
cls.text1 = cls.get_html_from_file('text1.html')
20-
cls.table_html = cls.get_html_from_file('tables1.html')
21-
cls.table2_html = cls.get_html_from_file('tables2.html')
22-
23-
@classmethod
24-
def tearDownClass(cls):
25-
outputpath = os.path.join(test_dir, 'test.docx')
26-
cls.document.save(outputpath)
27-
28-
def setUp(self):
29-
self.parser = HtmlToDocx()
30-
31-
def test_html_with_images_links_style(self):
32-
self.document.add_heading(
33-
'Test: add regular html with images, links and some formatting to document',
34-
level=1
35-
)
36-
self.parser.add_html_to_document(self.text1, self.document)
37-
38-
def test_add_html_to_table_cell(self):
39-
self.document.add_heading(
40-
'Test: regular html with images, links, some formatting to table cell',
41-
level=1
42-
)
43-
table = self.document.add_table(2,2, style='Table Grid')
44-
cell = table.cell(1,1)
45-
self.parser.add_html_to_document(self.text1, cell)
46-
47-
def test_add_html_skip_images(self):
48-
self.document.add_heading(
49-
'Test: regular html with images, but skip adding images',
50-
level=1
51-
)
52-
self.parser.options['images'] = False
53-
self.parser.add_html_to_document(self.text1, self.document)
54-
55-
def test_add_html_with_tables(self):
56-
self.document.add_heading(
57-
'Test: add html with tables',
58-
level=1
59-
)
60-
self.parser.add_html_to_document(self.table_html, self.document)
61-
62-
def test_add_html_with_tables_accent_style(self):
63-
self.document.add_heading(
64-
'Test: add html with tables with accent',
65-
)
66-
self.parser.table_style = 'Light Grid Accent 6'
67-
self.parser.add_html_to_document(self.table_html, self.document)
68-
69-
def test_add_html_with_tables_basic_style(self):
70-
self.document.add_heading(
71-
'Test: add html with tables with basic style',
72-
)
73-
self.parser.table_style = 'TableGrid'
74-
self.parser.add_html_to_document(self.table_html, self.document)
75-
76-
def test_add_nested_tables(self):
77-
self.document.add_heading(
78-
'Test: add nested tables',
79-
)
80-
self.parser.add_html_to_document(self.table2_html, self.document)
81-
82-
def test_add_nested_tables_basic_style(self):
83-
self.document.add_heading(
84-
'Test: add nested tables with basic style',
85-
)
86-
self.parser.table_style = 'TableGrid'
87-
self.parser.add_html_to_document(self.table2_html, self.document)
88-
89-
def test_add_nested_tables_accent_style(self):
90-
self.document.add_heading(
91-
'Test: add nested tables with accent style',
92-
)
93-
self.parser.table_style = 'Light Grid Accent 6'
94-
self.parser.add_html_to_document(self.table2_html, self.document)
95-
96-
def test_add_html_skip_tables(self):
97-
# broken until feature readded
98-
self.document.add_heading(
99-
'Test: add html with tables, but skip adding tables',
100-
level=1
101-
)
102-
self.parser.options['tables'] = False
103-
self.parser.add_html_to_document(self.table_html, self.document)
104-
105-
def test_wrong_argument_type_raises_error(self):
106-
try:
107-
self.parser.add_html_to_document(self.document, self.text1)
108-
except Exception as e:
109-
assert isinstance(e, ValueError)
110-
assert "First argument needs to be a <class 'str'>" in str(e)
111-
else:
112-
assert False, "Error not raised as expected"
113-
114-
try:
115-
self.parser.add_html_to_document(self.text1, self.text1)
116-
except Exception as e:
117-
assert isinstance(e, ValueError)
118-
assert "Second argument" in str(e)
119-
assert "<class 'docx.document.Document'>" in str(e)
120-
else:
121-
assert False, "Error not raised as expected"
122-
123-
def test_add_html_to_cells_method(self):
124-
self.document.add_heading(
125-
'Test: add_html_to_cells method',
126-
level=1
127-
)
128-
table = self.document.add_table(2,2, style='Table Grid')
129-
cell = table.cell(0,0)
130-
html = '''Line 0 without p tags<p>Line 1 with P tags</p>'''
131-
self.parser.add_html_to_cell(html, cell)
132-
133-
cell = table.cell(0,1)
134-
html = '''<p>Line 0 with p tags</p>Line 1 without p tags'''
135-
self.parser.add_html_to_cell(html, cell)
136-
137-
def test_inline_code(self):
138-
self.document.add_heading(
139-
'Test: inline code block',
140-
level=1
141-
)
142-
143-
html = "<p>This is a sentence that contains <code>some code elements</code> that " \
144-
"should appear as code.</p>"
145-
self.parser.add_html_to_document(html, self.document)
146-
147-
def test_code_block(self):
148-
self.document.add_heading(
149-
'Test: code block',
150-
level=1
151-
)
152-
153-
html = """<p><code>
154-
This is a code block.
155-
That should be NOT be pre-formatted.
156-
It should NOT retain carriage returns,
157-
158-
or blank lines.
159-
</code></p>"""
160-
self.parser.add_html_to_document(html, self.document)
161-
162-
def test_pre_block(self):
163-
self.document.add_heading(
164-
'Test: pre block',
165-
level=1
166-
)
167-
168-
html = """<pre>
169-
This is a pre-formatted block.
170-
That should be pre-formatted.
171-
Retaining any carriage returns,
172-
173-
and blank lines.
174-
</pre>
175-
"""
176-
self.parser.add_html_to_document(html, self.document)
177-
178-
def test_handling_hr(self):
179-
self.document.add_heading(
180-
'Test: Handling of hr',
181-
level=1
182-
)
183-
self.parser.add_html_to_document("<p>paragraph</p><hr><p>paragraph</p>", self.document)
184-
185-
186-
if __name__ == '__main__':
187-
unittest.main()
1+
import os
2+
from pathlib import Path
3+
import unittest
4+
from docx import Document
5+
from .context import HtmlToDocx, test_dir
6+
7+
class OutputTest(unittest.TestCase):
8+
9+
@staticmethod
10+
def get_html_from_file(filename: str):
11+
file_path = Path(test_dir) / Path(filename)
12+
with open(file_path, 'r') as f:
13+
html = f.read()
14+
return html
15+
16+
@classmethod
17+
def setUpClass(cls):
18+
cls.document = Document()
19+
cls.text1 = cls.get_html_from_file('text1.html')
20+
cls.table_html = cls.get_html_from_file('tables1.html')
21+
cls.table2_html = cls.get_html_from_file('tables2.html')
22+
23+
@classmethod
24+
def tearDownClass(cls):
25+
outputpath = os.path.join(test_dir, 'test.docx')
26+
cls.document.save(outputpath)
27+
28+
def setUp(self):
29+
self.parser = HtmlToDocx()
30+
31+
def test_html_with_images_links_style(self):
32+
self.document.add_heading(
33+
'Test: add regular html with images, links and some formatting to document',
34+
level=1
35+
)
36+
self.parser.add_html_to_document(self.text1, self.document)
37+
38+
def test_add_html_to_table_cell(self):
39+
self.document.add_heading(
40+
'Test: regular html with images, links, some formatting to table cell',
41+
level=1
42+
)
43+
table = self.document.add_table(2,2, style='Table Grid')
44+
cell = table.cell(1,1)
45+
self.parser.add_html_to_document(self.text1, cell)
46+
47+
def test_add_html_skip_images(self):
48+
self.document.add_heading(
49+
'Test: regular html with images, but skip adding images',
50+
level=1
51+
)
52+
self.parser.options['images'] = False
53+
self.parser.add_html_to_document(self.text1, self.document)
54+
55+
def test_add_html_with_tables(self):
56+
self.document.add_heading(
57+
'Test: add html with tables',
58+
level=1
59+
)
60+
self.parser.add_html_to_document(self.table_html, self.document)
61+
62+
def test_add_html_with_tables_accent_style(self):
63+
self.document.add_heading(
64+
'Test: add html with tables with accent',
65+
)
66+
self.parser.table_style = 'Light Grid Accent 6'
67+
self.parser.add_html_to_document(self.table_html, self.document)
68+
69+
def test_add_html_with_tables_basic_style(self):
70+
self.document.add_heading(
71+
'Test: add html with tables with basic style',
72+
)
73+
self.parser.table_style = 'TableGrid'
74+
self.parser.add_html_to_document(self.table_html, self.document)
75+
76+
def test_add_nested_tables(self):
77+
self.document.add_heading(
78+
'Test: add nested tables',
79+
)
80+
self.parser.add_html_to_document(self.table2_html, self.document)
81+
82+
def test_add_nested_tables_basic_style(self):
83+
self.document.add_heading(
84+
'Test: add nested tables with basic style',
85+
)
86+
self.parser.table_style = 'TableGrid'
87+
self.parser.add_html_to_document(self.table2_html, self.document)
88+
89+
def test_add_nested_tables_accent_style(self):
90+
self.document.add_heading(
91+
'Test: add nested tables with accent style',
92+
)
93+
self.parser.table_style = 'Light Grid Accent 6'
94+
self.parser.add_html_to_document(self.table2_html, self.document)
95+
96+
def test_add_html_skip_tables(self):
97+
# broken until feature readded
98+
self.document.add_heading(
99+
'Test: add html with tables, but skip adding tables',
100+
level=1
101+
)
102+
self.parser.options['tables'] = False
103+
self.parser.add_html_to_document(self.table_html, self.document)
104+
105+
def test_wrong_argument_type_raises_error(self):
106+
try:
107+
self.parser.add_html_to_document(self.document, self.text1)
108+
except Exception as e:
109+
assert isinstance(e, ValueError)
110+
assert "First argument needs to be a <class 'str'>" in str(e)
111+
else:
112+
assert False, "Error not raised as expected"
113+
114+
try:
115+
self.parser.add_html_to_document(self.text1, self.text1)
116+
except Exception as e:
117+
assert isinstance(e, ValueError)
118+
assert "Second argument" in str(e)
119+
assert "<class 'docx.document.Document'>" in str(e)
120+
else:
121+
assert False, "Error not raised as expected"
122+
123+
def test_add_html_to_cells_method(self):
124+
self.document.add_heading(
125+
'Test: add_html_to_cells method',
126+
level=1
127+
)
128+
table = self.document.add_table(2, 3, style='Table Grid')
129+
cell = table.cell(0, 0)
130+
html = '''Line 0 without p tags<p>Line 1 with P tags</p>'''
131+
self.parser.add_html_to_cell(html, cell)
132+
133+
cell = table.cell(0, 1)
134+
html = '''<p>Line 0 with p tags</p>Line 1 without p tags'''
135+
self.parser.add_html_to_cell(html, cell)
136+
137+
cell = table.cell(0, 2)
138+
cell.text = "Pre-defined text that shouldn't be removed."
139+
html = '''<p>Add HTML to non-empty cell.</p>'''
140+
self.parser.add_html_to_cell(html, cell)
141+
142+
def test_inline_code(self):
143+
self.document.add_heading(
144+
'Test: inline code block',
145+
level=1
146+
)
147+
148+
html = "<p>This is a sentence that contains <code>some code elements</code> that " \
149+
"should appear as code.</p>"
150+
self.parser.add_html_to_document(html, self.document)
151+
152+
def test_code_block(self):
153+
self.document.add_heading(
154+
'Test: code block',
155+
level=1
156+
)
157+
158+
html = """<p><code>
159+
This is a code block.
160+
That should be NOT be pre-formatted.
161+
It should NOT retain carriage returns,
162+
163+
or blank lines.
164+
</code></p>"""
165+
self.parser.add_html_to_document(html, self.document)
166+
167+
def test_pre_block(self):
168+
self.document.add_heading(
169+
'Test: pre block',
170+
level=1
171+
)
172+
173+
html = """<pre>
174+
This is a pre-formatted block.
175+
That should be pre-formatted.
176+
Retaining any carriage returns,
177+
178+
and blank lines.
179+
</pre>
180+
"""
181+
self.parser.add_html_to_document(html, self.document)
182+
183+
def test_handling_hr(self):
184+
self.document.add_heading(
185+
'Test: Handling of hr',
186+
level=1
187+
)
188+
self.parser.add_html_to_document("<p>paragraph</p><hr><p>paragraph</p>", self.document)
189+
190+
191+
if __name__ == '__main__':
192+
unittest.main()

0 commit comments

Comments
 (0)