Skip to content

Commit 99549e4

Browse files
author
Isabel Jimenez
committed
changes on export/import csv separator
1 parent 1de69d1 commit 99549e4

File tree

2 files changed

+77
-99
lines changed

2 files changed

+77
-99
lines changed

src/mongo/tools/export.cpp

Lines changed: 39 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// export.cpp
2+
13
/**
24
* Copyright (C) 2008 10gen Inc.
35
*
@@ -14,19 +16,19 @@
1416
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1517
*/
1618

17-
#include "mongo/pch.h"
19+
#include "pch.h"
20+
#include "db/json.h"
21+
#include "mongo/base/initializer.h"
22+
#include "mongo/client/dbclientcursor.h"
23+
24+
#include "tool.h"
1825

19-
#include <boost/filesystem/convenience.hpp>
20-
#include <boost/filesystem/operations.hpp>
21-
#include <boost/program_options.hpp>
2226
#include <fstream>
2327
#include <iostream>
2428

25-
#include "mongo/base/initializer.h"
26-
#include "mongo/client/dbclientcursor.h"
27-
#include "mongo/db/json.h"
28-
#include "mongo/tools/tool.h"
29-
#include "mongo/util/text.h"
29+
#include <boost/filesystem/convenience.hpp>
30+
#include <boost/filesystem/operations.hpp>
31+
#include <boost/program_options.hpp>
3032

3133
using namespace mongo;
3234

@@ -37,13 +39,15 @@ class Export : public Tool {
3739
Export() : Tool( "export" ) {
3840
addFieldOptions();
3941
add_options()
40-
("query,q" , po::value<string>() , "query filter, as a JSON string" )
41-
("csv","export to csv instead of json")
42-
("out,o", po::value<string>(), "output file; if not specified, stdout is used")
43-
("jsonArray", "output to a json array rather than one object per line")
44-
("slaveOk,k", po::value<bool>()->default_value(true) , "use secondaries for export if available, default true")
45-
("forceTableScan", "force a table scan (do not use $snapshot)" )
46-
;
42+
("query,q" , po::value<string>() , "query filter, as a JSON string" )
43+
("csv","export to csv instead of json")
44+
("out,o", po::value<string>(), "output file; if not specified, stdout is used")
45+
("s", po::value<string>()->default_value(","),"explicit csv separator/delimitator option")
46+
("jsonArray", "output to a json array rather than one object per line")
47+
("slaveOk,k", po::value<bool>()->default_value(true) , "use secondaries for export if available, default true")
48+
("forceTableScan", "force a table scan (do not use $snapshot)" )
49+
50+
;
4751
_usesstdout = false;
4852
}
4953

@@ -164,6 +168,8 @@ class Export : public Tool {
164168
return 1;
165169
}
166170

171+
auth();
172+
167173
if ( hasParam( "fields" ) || csv ) {
168174
needFields();
169175

@@ -199,12 +205,14 @@ class Export : public Tool {
199205
auto_ptr<DBClientCursor> cursor = conn().query( ns.c_str() , q , 0 , 0 , fieldsToReturn , ( slaveOk ? QueryOption_SlaveOk : 0 ) | QueryOption_NoCursorTimeout );
200206

201207
if ( csv ) {
202-
for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) {
203-
if ( i != _fields.begin() )
204-
out << ",";
205-
out << *i;
206-
}
207-
out << endl;
208+
for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) {
209+
if ( i != _fields.begin() ){
210+
string sep = getParam("s");
211+
out << sep;
212+
}
213+
out << *i;
214+
}
215+
out << endl;
208216
}
209217

210218
if (jsonArray)
@@ -216,12 +224,14 @@ class Export : public Tool {
216224
BSONObj obj = cursor->next();
217225
if ( csv ) {
218226
for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) {
219-
if ( i != _fields.begin() )
220-
out << ",";
221-
const BSONElement & e = obj.getFieldDotted(i->c_str());
222-
if ( ! e.eoo() ) {
223-
out << csvString(e);
224-
}
227+
if ( i != _fields.begin() ){
228+
string sep = getParam("s");
229+
out << sep;
230+
}
231+
const BSONElement & e = obj.getFieldDotted(i->c_str());
232+
if ( ! e.eoo() ) {
233+
out << csvString(e);
234+
}
225235
}
226236
out << endl;
227237
}
@@ -245,26 +255,8 @@ class Export : public Tool {
245255
}
246256
};
247257

248-
int toolMain( int argc , char ** argv, char** envp ) {
258+
int main( int argc , char ** argv, char** envp ) {
249259
mongo::runGlobalInitializersOrDie(argc, argv, envp);
250260
Export e;
251261
return e.main( argc , argv );
252262
}
253-
254-
#if defined(_WIN32)
255-
// In Windows, wmain() is an alternate entry point for main(), and receives the same parameters
256-
// as main() but encoded in Windows Unicode (UTF-16); "wide" 16-bit wchar_t characters. The
257-
// WindowsCommandLine object converts these wide character strings to a UTF-8 coded equivalent
258-
// and makes them available through the argv() and envp() members. This enables toolMain()
259-
// to process UTF-8 encoded arguments and environment variables without regard to platform.
260-
int wmain(int argc, wchar_t* argvW[], wchar_t* envpW[]) {
261-
WindowsCommandLine wcl(argc, argvW, envpW);
262-
int exitCode = toolMain(argc, wcl.argv(), wcl.envp());
263-
::_exit(exitCode);
264-
}
265-
#else
266-
int main(int argc, char* argv[], char** envp) {
267-
int exitCode = toolMain(argc, argv, envp);
268-
::_exit(exitCode);
269-
}
270-
#endif

src/mongo/tools/import.cpp

Lines changed: 38 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// import.cpp
2+
13
/**
24
* Copyright (C) 2008 10gen Inc.
35
*
@@ -14,18 +16,16 @@
1416
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1517
*/
1618

17-
#include "mongo/pch.h"
18-
19-
#include <boost/algorithm/string.hpp>
20-
#include <boost/filesystem/operations.hpp>
21-
#include <boost/program_options.hpp>
19+
#include "pch.h"
20+
#include "db/json.h"
21+
#include "tool.h"
22+
#include "../util/text.h"
23+
#include "mongo/base/initializer.h"
2224
#include <fstream>
2325
#include <iostream>
24-
25-
#include "mongo/base/initializer.h"
26-
#include "mongo/db/json.h"
27-
#include "mongo/tools/tool.h"
28-
#include "mongo/util/text.h"
26+
#include <boost/program_options.hpp>
27+
#include <boost/algorithm/string.hpp>
28+
#include <boost/filesystem/operations.hpp>
2929

3030
using namespace mongo;
3131
using std::string;
@@ -267,19 +267,20 @@ class Import : public Tool {
267267
Import() : Tool( "import" ) {
268268
addFieldOptions();
269269
add_options()
270-
("ignoreBlanks","if given, empty fields in csv and tsv will be ignored")
271-
("type",po::value<string>() , "type of file to import. default: json (json,csv,tsv)")
272-
("file",po::value<string>() , "file to import from; if not specified stdin is used" )
273-
("drop", "drop collection first " )
274-
("headerline","first line in input file is a header (CSV and TSV only)")
275-
("upsert", "insert or update objects that already exist" )
276-
("upsertFields", po::value<string>(), "comma-separated fields for the query part of the upsert. You should make sure this is indexed" )
277-
("stopOnError", "stop importing at first error rather than continuing" )
278-
("jsonArray", "load a json array, not one item per line. Currently limited to 16MB." )
279-
;
270+
("ignoreBlanks","if given, empty fields in csv and tsv will be ignored")
271+
("type",po::value<string>() , "type of file to import. default: json (json,csv,tsv)")
272+
("s",po::value<string>(), "separator. defaults: csv :',', tsv:'\t'")
273+
("file",po::value<string>() , "file to import from; if not specified stdin is used" )
274+
("drop", "drop collection first " )
275+
("headerline","first line in input file is a header (CSV and TSV only)")
276+
("upsert", "insert or update objects that already exist" )
277+
("upsertFields", po::value<string>(), "comma-separated fields for the query part of the upsert. You should make sure this is indexed" )
278+
("stopOnError", "stop importing at first error rather than continuing" )
279+
("jsonArray", "load a json array, not one item per line. Currently limited to 16MB." )
280+
;
280281
add_hidden_options()
281-
("noimport", "don't actually import. useful for benchmarking parser" )
282-
;
282+
("noimport", "don't actually import. useful for benchmarking parser" )
283+
;
283284
addPositionArg( "file" , 1 );
284285
_type = JSON;
285286
_ignoreBlanks = false;
@@ -288,7 +289,7 @@ class Import : public Tool {
288289
_doimport = true;
289290
_jsonArray = false;
290291
}
291-
;
292+
;
292293
virtual void printExtraHelp( ostream & out ) {
293294
out << "Import CSV, TSV or JSON data into MongoDB.\n" << endl;
294295
out << "When importing JSON documents, each document must be a separate line of the input file.\n";
@@ -351,6 +352,8 @@ class Import : public Tool {
351352

352353
LOG(1) << "ns: " << ns << endl;
353354

355+
auth();
356+
354357
if ( hasParam( "drop" ) ) {
355358
log() << "dropping: " << ns << endl;
356359
conn().dropCollection( ns.c_str() );
@@ -378,22 +381,23 @@ class Import : public Tool {
378381

379382
if ( hasParam( "type" ) ) {
380383
string type = getParam( "type" );
384+
bool sep = hasParam("s");
381385
if ( type == "json" )
382-
_type = JSON;
383-
else if ( type == "csv" ) {
384-
_type = CSV;
385-
_sep = ",";
386+
_type = JSON;
387+
else if ( type == "csv" ) {
388+
_type = CSV;
389+
_sep = sep ? getParam("s").c_str() : ",";
386390
}
387391
else if ( type == "tsv" ) {
388-
_type = TSV;
389-
_sep = "\t";
392+
_type = TSV;
393+
_sep = sep ? getParam("s").c_str() : "\t";
390394
}
391395
else {
392-
error() << "don't know what type [" << type << "] is" << endl;
393-
return -1;
396+
error() << "don't know what type [" << type << "] is" << endl;
397+
return -1;
394398
}
395399
}
396-
400+
397401
if ( _type == CSV || _type == TSV ) {
398402
_headerLine = hasParam( "headerline" );
399403
if ( _headerLine ) {
@@ -512,28 +516,10 @@ class Import : public Tool {
512516
}
513517
};
514518

515-
const int Import::BUF_SIZE(1024 * 1024 * 16);
516-
517-
int toolMain( int argc , char ** argv, char** envp ) {
519+
int main( int argc , char ** argv, char** envp ) {
518520
mongo::runGlobalInitializersOrDie(argc, argv, envp);
519521
Import import;
520522
return import.main( argc , argv );
521523
}
522524

523-
#if defined(_WIN32)
524-
// In Windows, wmain() is an alternate entry point for main(), and receives the same parameters
525-
// as main() but encoded in Windows Unicode (UTF-16); "wide" 16-bit wchar_t characters. The
526-
// WindowsCommandLine object converts these wide character strings to a UTF-8 coded equivalent
527-
// and makes them available through the argv() and envp() members. This enables toolMain()
528-
// to process UTF-8 encoded arguments and environment variables without regard to platform.
529-
int wmain(int argc, wchar_t* argvW[], wchar_t* envpW[]) {
530-
WindowsCommandLine wcl(argc, argvW, envpW);
531-
int exitCode = toolMain(argc, wcl.argv(), wcl.envp());
532-
::_exit(exitCode);
533-
}
534-
#else
535-
int main(int argc, char* argv[], char** envp) {
536-
int exitCode = toolMain(argc, argv, envp);
537-
::_exit(exitCode);
538-
}
539-
#endif
525+
const int Import::BUF_SIZE(1024 * 1024 * 16);

0 commit comments

Comments
 (0)