File tree Expand file tree Collapse file tree 1 file changed +25
-1
lines changed Expand file tree Collapse file tree 1 file changed +25
-1
lines changed Original file line number Diff line number Diff line change 1- URL = "http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz"
1+ """Script to download the 20 newsgroups text classification set"""
2+
3+ import os
4+ import urllib
5+ import tarfile
6+
7+ URL = ("http://people.csail.mit.edu/jrennie/"
8+ "20Newsgroups/20news-bydate.tar.gz" )
9+
10+ ARCHIVE_NAME = "20news-bydate.tar.gz"
11+ TRAIN_FOLDER = "20news-bydate-train"
12+ TEST_FOLDER = "20news-bydate-test"
13+
14+
15+ if not os .path .exists (TRAIN_FOLDER ) or not os .path .exists (TEST_FOLDER ):
16+
17+ if not os .path .exists (ARCHIVE_NAME ):
18+ print "Downloading dataset from %s (14 MB)" % URL
19+ opener = urllib .urlopen (URL )
20+ open (ARCHIVE_NAME , 'wb' ).write (opener .read ())
21+
22+ print "Decompressing %s" % ARCHIVE_NAME
23+ tarfile .open (ARCHIVE_NAME , "r:gz" ).extractall (path = '.' )
24+ os .remove (ARCHIVE_NAME )
25+
You can’t perform that action at this time.
0 commit comments