Skip to content

Commit e6bd5ab

Browse files
authored
Merge pull request pqzx#16 from jongracecox/jongracecox/add-table-styles
Add support for styled tables
2 parents de4fe3e + 1533307 commit e6bd5ab

File tree

3 files changed

+81
-6
lines changed

3 files changed

+81
-6
lines changed

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,23 @@ from htmldocx import HtmlToDocx
4343
new_parser = HtmlToDocx()
4444
docx = new_parser.parse_html_string(input_html_file_string)
4545
```
46+
47+
Change table styles
48+
49+
Tables are not styled by default. Use the `table_style` attribute on the parser to set a table
50+
style. The style is used for all tables.
51+
52+
```
53+
from htmldocx import HtmlToDocx
54+
55+
new_parser = HtmlToDocx()
56+
new_parser.table_style = 'Light Shading Accent 4'
57+
```
58+
59+
To add borders to tables, use the `TableGrid` style:
60+
61+
```
62+
new_parser.table_style = 'TableGrid'
63+
```
64+
65+
Default table styles can be found here: https://python-docx.readthedocs.io/en/latest/user/styles-understanding.html#table-styles-in-default-template

htmldocx/h2d.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
LIST_INDENT = 0.5
3333
MAX_INDENT = 5.5 # To stop indents going off the page
3434

35+
# Style to use with tables. By default no style is used.
36+
DEFAULT_TABLE_STYLE = None
37+
3538
def get_filename_from_url(url):
3639
return os.path.basename(urlparse(url).path)
3740

@@ -103,6 +106,7 @@ def __init__(self):
103106
'table > tbody > tr',
104107
'table > tfoot > tr'
105108
]
109+
self.table_style = DEFAULT_TABLE_STYLE
106110

107111
def set_initial_attrs(self, document=None):
108112
self.tags = {
@@ -123,6 +127,10 @@ def set_initial_attrs(self, document=None):
123127
self.skip_tag = None
124128
self.instances_to_skip = 0
125129

130+
def copy_settings_from(self, other):
131+
"""Copy settings from another instance of HtmlToDocx"""
132+
self.table_style = other.table_style
133+
126134
def get_cell_html(self, soup):
127135
# Returns string of td element with opening and closing <td> tags removed
128136
# Cannot use find_all as it only finds element tags and does not find text which
@@ -244,6 +252,13 @@ def handle_table(self):
244252
table_soup = self.tables[self.table_no]
245253
rows, cols = self.get_table_dimensions(table_soup)
246254
self.table = self.doc.add_table(rows, cols)
255+
256+
if self.table_style:
257+
try:
258+
self.table.style = self.table_style
259+
except KeyError as e:
260+
raise ValueError(f"Unable to apply style {self.table_style}.") from e
261+
247262
rows = self.get_table_rows(table_soup)
248263
cell_row = 0
249264
for row in rows:
@@ -255,6 +270,7 @@ def handle_table(self):
255270
cell_html = "<b>%s</b>" % cell_html
256271
docx_cell = self.table.cell(cell_row, cell_col)
257272
child_parser = HtmlToDocx()
273+
child_parser.copy_settings_from(self)
258274
child_parser.add_html_to_cell(cell_html, docx_cell)
259275
cell_col += 1
260276
cell_row += 1

tests/test.py

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
11
import os
2+
from pathlib import Path
23
import unittest
34
from docx import Document
45
from .context import HtmlToDocx, test_dir
56

67
class OutputTest(unittest.TestCase):
78

9+
@staticmethod
10+
def get_html_from_file(filename: str):
11+
file_path = Path(test_dir) / Path(filename)
12+
with open(file_path, 'r') as f:
13+
html = f.read()
14+
return html
15+
816
@classmethod
917
def setUpClass(cls):
1018
cls.document = Document()
11-
textpath = os.path.join(test_dir, 'text1.html')
12-
tablepath = os.path.join(test_dir, 'tables1.html')
13-
with open(textpath, 'r') as tb:
14-
cls.text1 = tb.read()
15-
with open(tablepath, 'r') as tb:
16-
cls.table_html = tb.read()
19+
cls.text1 = cls.get_html_from_file('text1.html')
20+
cls.table_html = cls.get_html_from_file('tables1.html')
21+
cls.table2_html = cls.get_html_from_file('tables2.html')
1722

1823
@classmethod
1924
def tearDownClass(cls):
@@ -54,6 +59,40 @@ def test_add_html_with_tables(self):
5459
)
5560
self.parser.add_html_to_document(self.table_html, self.document)
5661

62+
def test_add_html_with_tables_accent_style(self):
63+
self.document.add_heading(
64+
'Test: add html with tables with accent',
65+
)
66+
self.parser.table_style = 'Light Grid Accent 6'
67+
self.parser.add_html_to_document(self.table_html, self.document)
68+
69+
def test_add_html_with_tables_basic_style(self):
70+
self.document.add_heading(
71+
'Test: add html with tables with basic style',
72+
)
73+
self.parser.table_style = 'TableGrid'
74+
self.parser.add_html_to_document(self.table_html, self.document)
75+
76+
def test_add_nested_tables(self):
77+
self.document.add_heading(
78+
'Test: add nested tables',
79+
)
80+
self.parser.add_html_to_document(self.table2_html, self.document)
81+
82+
def test_add_nested_tables_basic_style(self):
83+
self.document.add_heading(
84+
'Test: add nested tables with basic style',
85+
)
86+
self.parser.table_style = 'TableGrid'
87+
self.parser.add_html_to_document(self.table2_html, self.document)
88+
89+
def test_add_nested_tables_accent_style(self):
90+
self.document.add_heading(
91+
'Test: add nested tables with accent style',
92+
)
93+
self.parser.table_style = 'Light Grid Accent 6'
94+
self.parser.add_html_to_document(self.table2_html, self.document)
95+
5796
def test_add_html_skip_tables(self):
5897
# broken until feature readded
5998
self.document.add_heading(

0 commit comments

Comments
 (0)