11#!/usr/bin/env python
22
33import sys
4+ from os .path import splitext
45
56import agate
67import agatedbf # noqa
@@ -42,11 +43,28 @@ def option_parser(bytestring):
4243 help = 'Display sheet names from the input Excel file.' )
4344 self .argparser .add_argument ('--sheet' , dest = 'sheet' , type = option_parser ,
4445 help = 'The name of the Excel sheet to operate on.' )
46+ self .argparser .add_argument ('--write-sheets' , dest = 'write_sheets' , type = option_parser ,
47+ help = 'The names of the Excel sheets to write to files, or "-" to write all sheets.' )
4548 self .argparser .add_argument ('-y' , '--snifflimit' , dest = 'sniff_limit' , type = int ,
4649 help = 'Limit CSV dialect sniffing to the specified number of bytes. Specify "0" to disable sniffing entirely.' )
4750 self .argparser .add_argument ('-I' , '--no-inference' , dest = 'no_inference' , action = 'store_true' ,
4851 help = 'Disable type inference (and --locale, --date-format, --datetime-format) when parsing CSV input.' )
4952
53+ def open_excel_input_file (self , path ):
54+ if not path or path == '-' :
55+ if six .PY2 :
56+ return six .BytesIO (sys .stdin .read ())
57+ else :
58+ return six .BytesIO (sys .stdin .buffer .read ())
59+ else :
60+ return open (path , 'rb' )
61+
62+ def sheet_names (self , filetype ):
63+ if filetype == 'xls' :
64+ return xlrd .open_workbook (file_contents = self .input_file .read ()).sheet_names ()
65+ elif filetype == 'xlsx' :
66+ return openpyxl .load_workbook (self .input_file , read_only = True , data_only = True ).sheetnames
67+
5068 def main (self ):
5169 path = self .args .input_path
5270
@@ -71,25 +89,15 @@ def main(self):
7189
7290 # Set the input file.
7391 if filetype in ('xls' , 'xlsx' ):
74- if not path or path == '-' :
75- if six .PY2 :
76- self .input_file = six .BytesIO (sys .stdin .read ())
77- else :
78- self .input_file = six .BytesIO (sys .stdin .buffer .read ())
79- else :
80- self .input_file = open (path , 'rb' )
92+ self .input_file = self .open_excel_input_file (path )
8193 else :
8294 self .input_file = self ._open_input_file (path )
8395
8496 if self .args .names_only :
85- sheet_names = None
86- if filetype == 'xls' :
87- sheet_names = xlrd .open_workbook (file_contents = self .input_file .read ()).sheet_names ()
88- elif filetype == 'xlsx' :
89- sheet_names = openpyxl .load_workbook (self .input_file , read_only = True , data_only = True ).sheetnames
90- if sheet_names :
91- for name in sheet_names :
92- self .output_file .write ('%s\n ' % name )
97+ sheets = self .sheet_names (filetype )
98+ if sheets :
99+ for sheet in sheets :
100+ self .output_file .write ('%s\n ' % sheet )
93101 else :
94102 self .argparser .error ('You cannot use the -n or --names options with non-Excel files.' )
95103 self .input_file .close ()
@@ -103,9 +111,6 @@ def main(self):
103111 elif filetype == 'fixed' :
104112 raise ValueError ('schema must not be null when format is "fixed"' )
105113
106- if self .args .sheet :
107- kwargs ['sheet' ] = self .args .sheet
108-
109114 if filetype == 'csv' :
110115 kwargs .update (self .reader_kwargs )
111116 kwargs ['sniff_limit' ] = self .args .sniff_limit
@@ -133,15 +138,36 @@ def main(self):
133138 elif filetype == 'ndjson' :
134139 table = agate .Table .from_json (self .input_file , key = self .args .key , newline = True , ** kwargs )
135140 elif filetype == 'xls' :
136- table = agate .Table .from_xls (self .input_file , ** kwargs )
141+ table = agate .Table .from_xls (self .input_file , sheet = self . args . sheet , ** kwargs )
137142 elif filetype == 'xlsx' :
138- table = agate .Table .from_xlsx (self .input_file , ** kwargs )
143+ table = agate .Table .from_xlsx (self .input_file , sheet = self . args . sheet , ** kwargs )
139144 elif filetype == 'dbf' :
140145 if not hasattr (self .input_file , 'name' ):
141146 raise ValueError ('DBF files can not be converted from stdin. You must pass a filename.' )
142147 table = agate .Table .from_dbf (self .input_file .name , ** kwargs )
143148 table .to_csv (self .output_file )
144149
150+ if self .args .write_sheets :
151+ # Close and re-open the file, as the file object has been mutated or closed.
152+ self .input_file .close ()
153+
154+ self .input_file = self .open_excel_input_file (path )
155+
156+ if self .args .write_sheets == '-' :
157+ sheets = self .sheet_names (filetype )
158+ else :
159+ sheets = [int (sheet ) if sheet .isdigit () else sheet for sheet in self .args .write_sheets .split (',' )]
160+
161+ if filetype == 'xls' :
162+ tables = agate .Table .from_xls (self .input_file , sheet = sheets , ** kwargs )
163+ elif filetype == 'xlsx' :
164+ tables = agate .Table .from_xlsx (self .input_file , sheet = sheets , ** kwargs )
165+
166+ base = splitext (self .input_file .name )[0 ]
167+ for i , table in enumerate (tables .values ()):
168+ with open ('%s_%d.csv' % (base , i ), 'w' ) as f :
169+ table .to_csv (f )
170+
145171 self .input_file .close ()
146172
147173 if self .args .schema :
0 commit comments