From 1ff4043c2fd40e90e71df738e585a5e81b687e7f Mon Sep 17 00:00:00 2001
From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com>
Date: Tue, 22 Dec 2020 23:31:45 +1100
Subject: [PATCH] [*.py] Rename "Arguments:" to "Args:"
---
.../text_classification/iterable_train.py | 8 ++++----
examples/text_classification/model.py | 2 +-
examples/text_classification/predict.py | 2 +-
examples/text_classification/train.py | 4 ++--
torchtext/data/dataset.py | 14 ++++++-------
torchtext/data/field.py | 16 +++++++--------
torchtext/data/functional.py | 10 +++++-----
torchtext/data/iterator.py | 2 +-
torchtext/data/metrics.py | 4 ++--
torchtext/data/pipeline.py | 10 +++++-----
torchtext/data/utils.py | 4 ++--
torchtext/datasets/imdb.py | 6 +++---
torchtext/datasets/language_modeling.py | 14 ++++++-------
torchtext/datasets/nli.py | 4 ++--
torchtext/datasets/sst.py | 6 +++---
torchtext/datasets/text_classification.py | 18 ++++++++---------
torchtext/datasets/translation.py | 10 +++++-----
torchtext/datasets/trec.py | 6 +++---
torchtext/datasets/unsupervised_learning.py | 2 +-
.../datasets/language_modeling.py | 10 +++++-----
.../experimental/datasets/question_answer.py | 6 +++---
.../datasets/raw/language_modeling.py | 8 ++++----
.../datasets/raw/question_answer.py | 4 ++--
.../datasets/raw/sequence_tagging.py | 4 ++--
.../datasets/raw/text_classification.py | 18 ++++++++---------
.../experimental/datasets/raw/translation.py | 6 +++---
.../experimental/datasets/sequence_tagging.py | 6 +++---
.../datasets/text_classification.py | 20 +++++++++----------
.../experimental/datasets/translation.py | 8 ++++----
torchtext/experimental/transforms.py | 2 +-
torchtext/experimental/vectors.py | 2 +-
torchtext/experimental/vocab.py | 6 +++---
torchtext/utils.py | 6 +++---
torchtext/vocab.py | 14 ++++++-------
34 files changed, 131 insertions(+), 131 deletions(-)
diff --git a/examples/text_classification/iterable_train.py b/examples/text_classification/iterable_train.py
index d4e6507bc5..94b3c1c3a3 100644
--- a/examples/text_classification/iterable_train.py
+++ b/examples/text_classification/iterable_train.py
@@ -60,7 +60,7 @@ def train_and_valid(lr_, num_epoch, train_data_, valid_data_):
r"""
Here we use SGD optimizer to train the model.
- Arguments:
+ Args:
lr_: learning rate
num_epoch: the number of epoches for training the model
train_data_: the data used to train the model
@@ -108,7 +108,7 @@ def train_and_valid(lr_, num_epoch, train_data_, valid_data_):
def test(data_):
r"""
- Arguments:
+ Args:
data_: the data used to train the model
"""
data = DataLoader(
@@ -137,7 +137,7 @@ def get_csv_iterator(data_path, ngrams, vocab, start=0, num_lines=None):
Generate an iterator to read CSV file.
The yield values are an integer for the label and a tensor for the text part.
- Arguments:
+ Args:
data_path: a path for the data file.
ngrams: the number used for ngrams.
vocab: a vocab object saving the string-to-index information
@@ -171,7 +171,7 @@ class Dataset(torch.utils.data.IterableDataset):
An iterable dataset to save the data. This dataset supports multi-processing
to load the data.
- Arguments:
+ Args:
iterator: the iterator to read data.
num_lines: the number of lines read by the individual iterator.
"""
diff --git a/examples/text_classification/model.py b/examples/text_classification/model.py
index e96d1d0125..1314b1b55a 100644
--- a/examples/text_classification/model.py
+++ b/examples/text_classification/model.py
@@ -31,7 +31,7 @@ def init_weights(self):
def forward(self, text, offsets):
r"""
- Arguments:
+ Args:
text: 1-D tensor representing a bag of text tensors
offsets: a list of offsets to delimit the 1-D text tensor
into the individual sequences.
diff --git a/examples/text_classification/predict.py b/examples/text_classification/predict.py
index b11de131a9..4bdd1f7fce 100644
--- a/examples/text_classification/predict.py
+++ b/examples/text_classification/predict.py
@@ -11,7 +11,7 @@ def predict(text, model, dictionary, ngrams):
The input text is numericalized with the vocab and then sent to
the model for inference.
- Arguments:
+ Args:
text: a sample text string
model: the trained model
dictionary: a vocab object for the information of string-to-index
diff --git a/examples/text_classification/train.py b/examples/text_classification/train.py
index fbd0831832..84f20fb0d4 100644
--- a/examples/text_classification/train.py
+++ b/examples/text_classification/train.py
@@ -56,7 +56,7 @@ def train_and_valid(lr_, sub_train_, sub_valid_):
We use a SGD optimizer to train the model here and the learning rate
decreases linearly with the progress of the training process.
- Arguments:
+ Args:
lr_: learning rate
sub_train_: the data used to train the model
sub_valid_: the data used for validation
@@ -94,7 +94,7 @@ def train_and_valid(lr_, sub_train_, sub_valid_):
def test(data_):
r"""
- Arguments:
+ Args:
data_: the data used to train the model
"""
data = DataLoader(data_, batch_size=batch_size, collate_fn=generate_batch)
diff --git a/torchtext/data/dataset.py b/torchtext/data/dataset.py
index c9efe9168b..eecfc49d9c 100644
--- a/torchtext/data/dataset.py
+++ b/torchtext/data/dataset.py
@@ -29,7 +29,7 @@ class Dataset(torch.utils.data.Dataset):
def __init__(self, examples, fields, filter_pred=None):
"""Create a dataset from a list of Examples and Fields.
- Arguments:
+ Args:
examples: List of Examples.
fields (List(tuple(str, Field))): The Fields to use in this tuple. The
string is a field name, and the Field is the associated field.
@@ -55,7 +55,7 @@ def splits(cls, path=None, root='.data', train=None, validation=None,
test=None, **kwargs):
"""Create Dataset objects for multiple splits of a dataset.
- Arguments:
+ Args:
path (str): Common prefix of the splits' file paths, or None to use
the result of cls.download(root).
root (str): Root dataset storage directory. Default is '.data'.
@@ -87,7 +87,7 @@ def split(self, split_ratio=0.7, stratified=False, strata_field='label',
random_state=None):
"""Create train-test(-valid?) splits from the instance's examples.
- Arguments:
+ Args:
split_ratio (float or List of floats): a number [0, 1] denoting the amount
of data to be used for the training split (rest is used for test),
or a list of numbers denoting the relative sizes of train, test and valid
@@ -157,7 +157,7 @@ def __getattr__(self, attr):
def download(cls, root, check=None):
"""Download and unzip an online archive (.zip, .gz, or .tgz).
- Arguments:
+ Args:
root (str): Folder to download data to.
check (str or None): Folder whose existence indicates
that the dataset has already been downloaded, or
@@ -201,7 +201,7 @@ def download(cls, root, check=None):
def filter_examples(self, field_names):
"""Remove unknown words from dataset examples with respect to given field.
- Arguments:
+ Args:
field_names (list(str)): Within example only the parts with field names in
field_names will have their unknown words deleted.
"""
@@ -221,7 +221,7 @@ def __init__(self, path, format, fields, skip_header=False,
csv_reader_params={}, **kwargs):
"""Create a TabularDataset given a path, file format, and field list.
- Arguments:
+ Args:
path (str): Path to the data file.
format (str): The format of the data file. One of "CSV", "TSV", or
"JSON" (case-insensitive).
@@ -325,7 +325,7 @@ def stratify(examples, strata_field):
def rationed_split(examples, train_ratio, test_ratio, val_ratio, rnd):
"""Create a random permutation of examples, then split them by ratios
- Arguments:
+ Args:
examples: a list of data
train_ratio, test_ratio, val_ratio: split fractions.
rnd: a random shuffler
diff --git a/torchtext/data/field.py b/torchtext/data/field.py
index e117b2edb7..95be1b85f2 100644
--- a/torchtext/data/field.py
+++ b/torchtext/data/field.py
@@ -274,7 +274,7 @@ def pad(self, minibatch):
def build_vocab(self, *args, **kwargs):
"""Construct the Vocab object for this field from one or more datasets.
- Arguments:
+ Args:
Positional arguments: Dataset objects or other iterable data
sources from which to construct the Vocab object that
represents the set of possible values for this field. If
@@ -311,7 +311,7 @@ def numericalize(self, arr, device=None):
If the field has include_lengths=True, a tensor of lengths will be
included in the return value.
- Arguments:
+ Args:
arr (List[List[str]], or tuple of (List[List[str]], List[int])):
List of tokenized and padded examples, or tuple of List of
tokenized and padded examples and List of lengths of each
@@ -423,7 +423,7 @@ def __init__(self, **kwargs):
def segment(self, *args):
"""Segment one or more datasets with this subword field.
- Arguments:
+ Args:
Positional arguments: Dataset objects or other indexable
mutable sequences to segment. If a Dataset object is provided,
all columns corresponding to this field are used; individual
@@ -455,7 +455,7 @@ class NestedField(Field):
primarily used to implement character embeddings. See ``tests/data/test_field.py``
for examples on how to use this field.
- Arguments:
+ Args:
nesting_field (Field): A field contained in this nested field.
use_vocab (bool): Whether to use a Vocab object. If False, the data in this
field should already be numerical. Default: ``True``.
@@ -533,7 +533,7 @@ def preprocess(self, xs):
the list is preprocessed using ``self.nesting_field.preprocess`` and the resulting
list is returned.
- Arguments:
+ Args:
xs (list or str): The input to preprocess.
Returns:
@@ -576,7 +576,7 @@ def pad(self, minibatch):
['', '', '', '', '', '', ''],
['', '', '', '', '', '', '']]]
- Arguments:
+ Args:
minibatch (list): Each element is a list of string if
``self.nesting_field.sequential`` is ``False``, a list of list of string
otherwise.
@@ -646,7 +646,7 @@ def pad(self, minibatch):
def build_vocab(self, *args, **kwargs):
"""Construct the Vocab object for nesting field and combine it with this field's vocab.
- Arguments:
+ Args:
Positional arguments: Dataset objects or other iterable data
sources from which to construct the Vocab object that
represents the set of possible values for the nesting field. If
@@ -697,7 +697,7 @@ def numericalize(self, arrs, device=None):
Each item in the minibatch will be numericalized independently and the resulting
tensors will be stacked at the first dimension.
- Arguments:
+ Args:
arr (List[List[str]]): List of tokenized and padded examples.
device (str or torch.device): A string or instance of `torch.device`
specifying which device the Variables are going to be created on.
diff --git a/torchtext/data/functional.py b/torchtext/data/functional.py
index 6e20c8e667..995025e929 100644
--- a/torchtext/data/functional.py
+++ b/torchtext/data/functional.py
@@ -21,7 +21,7 @@ def generate_sp_model(filename, vocab_size=20000,
model_prefix='m_user'):
r"""Train a SentencePiece tokenizer.
- Arguments:
+ Args:
filename: the data file for training SentencePiece model.
vocab_size: the size of vocabulary (Default: 20,000).
model_type: the type of SentencePiece model, including unigram,
@@ -42,7 +42,7 @@ def generate_sp_model(filename, vocab_size=20000,
def load_sp_model(spm):
r"""Load a sentencepiece model for file.
- Arguments:
+ Args:
spm: the file path or a file object saving the sentencepiece model.
Outputs:
@@ -70,7 +70,7 @@ def sentencepiece_numericalizer(sp_model):
r"""A sentencepiece model to numericalize a text sentence into
a generator over the ids.
- Arguments:
+ Args:
sp_model: a SentencePiece model.
Outputs:
@@ -96,7 +96,7 @@ def sentencepiece_tokenizer(sp_model):
r"""A sentencepiece model to tokenize a text sentence into
a generator over the tokens.
- Arguments:
+ Args:
sp_model: a SentencePiece model.
Outputs:
@@ -157,7 +157,7 @@ def simple_space_split(iterator):
def numericalize_tokens_from_iterator(vocab, iterator, removed_tokens=None):
r"""Yield a list of ids from an token iterator with a vocab.
- Arguments:
+ Args:
vocab: the vocabulary convert token into id.
iterator: the iterator yield a list of tokens.
removed_tokens: removed tokens from output dataset (Default: None)
diff --git a/torchtext/data/iterator.py b/torchtext/data/iterator.py
index 4c1119cb8c..3dfa807138 100644
--- a/torchtext/data/iterator.py
+++ b/torchtext/data/iterator.py
@@ -85,7 +85,7 @@ def __init__(self, dataset, batch_size, sort_key=None, device=None,
def splits(cls, datasets, batch_sizes=None, **kwargs):
"""Create Iterator objects for multiple splits of a dataset.
- Arguments:
+ Args:
datasets: Tuple of Dataset objects corresponding to the splits. The
first such object should be the train set.
batch_sizes: Tuple of batch sizes to use for the different splits,
diff --git a/torchtext/data/metrics.py b/torchtext/data/metrics.py
index 63e07cdb21..c5c2983ee4 100644
--- a/torchtext/data/metrics.py
+++ b/torchtext/data/metrics.py
@@ -7,7 +7,7 @@
def _compute_ngram_counter(tokens, max_n):
""" Create a Counter with a count of unique n-grams in the tokens list
- Arguments:
+ Args:
tokens: a list of tokens (typically a string split on whitespaces)
max_n: the maximum order of n-gram wanted
@@ -36,7 +36,7 @@ def bleu_score(candidate_corpus, references_corpus, max_n=4, weights=[0.25] * 4)
"""Computes the BLEU score between a candidate translation corpus and a references
translation corpus. Based on https://www.aclweb.org/anthology/P02-1040.pdf
- Arguments:
+ Args:
candidate_corpus: an iterable of candidate translations. Each translation is an
iterable of tokens
references_corpus: an iterable of iterables of reference translations. Each
diff --git a/torchtext/data/pipeline.py b/torchtext/data/pipeline.py
index f576fdc720..d72ef5ef4c 100644
--- a/torchtext/data/pipeline.py
+++ b/torchtext/data/pipeline.py
@@ -12,7 +12,7 @@ class Pipeline(object):
def __init__(self, convert_token=None):
"""Create a pipeline.
- Arguments:
+ Args:
convert_token: The function to apply to input sequence data.
If None, the identity function is used. Default: None
"""
@@ -28,7 +28,7 @@ def __init__(self, convert_token=None):
def __call__(self, x, *args):
"""Apply the the current Pipeline(s) to an input.
- Arguments:
+ Args:
x: The input to process with the Pipeline(s).
Positional arguments: Forwarded to the `call` function
of the Pipeline(s).
@@ -43,7 +43,7 @@ def call(self, x, *args):
applying the `convert_token` function to all input elements is
returned.
- Arguments:
+ Args:
x: The input to apply the convert_token function to.
Positional arguments: Forwarded to the `convert_token` function
of the current Pipeline.
@@ -55,7 +55,7 @@ def call(self, x, *args):
def add_before(self, pipeline):
"""Add a Pipeline to be applied before this processing pipeline.
- Arguments:
+ Args:
pipeline: The Pipeline or callable to apply before this
Pipeline.
"""
@@ -67,7 +67,7 @@ def add_before(self, pipeline):
def add_after(self, pipeline):
"""Add a Pipeline to be applied after this processing pipeline.
- Arguments:
+ Args:
pipeline: The Pipeline or callable to apply after this
Pipeline.
"""
diff --git a/torchtext/data/utils.py b/torchtext/data/utils.py
index 5ecfad1958..045c2646fb 100644
--- a/torchtext/data/utils.py
+++ b/torchtext/data/utils.py
@@ -76,7 +76,7 @@ def get_tokenizer(tokenizer, language='en'):
r"""
Generate tokenizer function for a string sentence.
- Arguments:
+ Args:
tokenizer: the name of tokenizer function. If None, it returns split()
function, which splits the string sentence by space.
If basic_english, it returns _basic_english_normalize() function,
@@ -205,7 +205,7 @@ def dtype_to_attr(dtype):
def ngrams_iterator(token_list, ngrams):
"""Return an iterator that yields the given tokens and their ngrams.
- Arguments:
+ Args:
token_list: A list of tokens
ngrams: the number of ngrams.
diff --git a/torchtext/datasets/imdb.py b/torchtext/datasets/imdb.py
index 38fccb97be..e59ce19ecb 100644
--- a/torchtext/datasets/imdb.py
+++ b/torchtext/datasets/imdb.py
@@ -18,7 +18,7 @@ def sort_key(ex):
def __init__(self, path, text_field, label_field, **kwargs):
"""Create an IMDB dataset instance given a path and fields.
- Arguments:
+ Args:
path: Path to the dataset's highest level directory
text_field: The field that will be used for text data.
label_field: The field that will be used for label data.
@@ -41,7 +41,7 @@ def splits(cls, text_field, label_field, root='.data',
train='train', test='test', **kwargs):
"""Create dataset objects for splits of the IMDB dataset.
- Arguments:
+ Args:
text_field: The field that will be used for the sentence.
label_field: The field that will be used for label data.
root: Root dataset storage directory. Default is '.data'.
@@ -58,7 +58,7 @@ def splits(cls, text_field, label_field, root='.data',
def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs):
"""Create iterator objects for splits of the IMDB dataset.
- Arguments:
+ Args:
batch_size: Batch_size
device: Device to create batches on. Use - 1 for CPU and None for
the currently active GPU device.
diff --git a/torchtext/datasets/language_modeling.py b/torchtext/datasets/language_modeling.py
index 0002aabc04..7ebcca71b1 100644
--- a/torchtext/datasets/language_modeling.py
+++ b/torchtext/datasets/language_modeling.py
@@ -9,7 +9,7 @@ def __init__(self, path, text_field, newline_eos=True,
encoding='utf-8', **kwargs):
"""Create a LanguageModelingDataset given a path and a field.
- Arguments:
+ Args:
path: Path to the data file.
text_field: The field that will be used for text data.
newline_eos: Whether to add an token for every newline in the
@@ -44,7 +44,7 @@ def splits(cls, text_field, root='.data', train='wiki.train.tokens',
This is the most flexible way to use the dataset.
- Arguments:
+ Args:
text_field: The field that will be used for text data.
root: The root directory that the dataset's zip archive will be
expanded into; therefore the directory in whose wikitext-2
@@ -67,7 +67,7 @@ def iters(cls, batch_size=32, bptt_len=35, device=0, root='.data',
This is the simplest way to use the dataset, and assumes common
defaults for field, vocabulary, and iterator parameters.
- Arguments:
+ Args:
batch_size: Batch size.
bptt_len: Length of sequences for backpropagation through time.
device: Device to create batches on. Use -1 for CPU and None for
@@ -105,7 +105,7 @@ def splits(cls, text_field, root='.data', train='wiki.train.tokens',
This is the most flexible way to use the dataset.
- Arguments:
+ Args:
text_field: The field that will be used for text data.
root: The root directory that the dataset's zip archive will be
expanded into; therefore the directory in whose wikitext-103
@@ -128,7 +128,7 @@ def iters(cls, batch_size=32, bptt_len=35, device=0, root='.data',
This is the simplest way to use the dataset, and assumes common
defaults for field, vocabulary, and iterator parameters.
- Arguments:
+ Args:
batch_size: Batch size.
bptt_len: Length of sequences for backpropagation through time.
device: Device to create batches on. Use -1 for CPU and None for
@@ -174,7 +174,7 @@ def splits(cls, text_field, root='.data', train='ptb.train.txt',
**kwargs):
"""Create dataset objects for splits of the Penn Treebank dataset.
- Arguments:
+ Args:
text_field: The field that will be used for text data.
root: The root directory where the data files will be stored.
train: The filename of the train data. Default: 'ptb.train.txt'.
@@ -195,7 +195,7 @@ def iters(cls, batch_size=32, bptt_len=35, device=0, root='.data',
This is the simplest way to use the dataset, and assumes common
defaults for field, vocabulary, and iterator parameters.
- Arguments:
+ Args:
batch_size: Batch size.
bptt_len: Length of sequences for backpropagation through time.
device: Device to create batches on. Use -1 for CPU and None for
diff --git a/torchtext/datasets/nli.py b/torchtext/datasets/nli.py
index 9b3d758b06..758576e7cc 100644
--- a/torchtext/datasets/nli.py
+++ b/torchtext/datasets/nli.py
@@ -51,7 +51,7 @@ def splits(cls, text_field, label_field, parse_field=None,
This is the most flexible way to use the dataset.
- Arguments:
+ Args:
text_field: The field that will be used for premise and hypothesis
data.
label_field: The field that will be used for label data.
@@ -96,7 +96,7 @@ def iters(cls, batch_size=32, device=0, root='.data',
This is the simplest way to use the dataset, and assumes common
defaults for field, vocabulary, and iterator parameters.
- Arguments:
+ Args:
batch_size: Batch size.
device: Device to create batches on. Use -1 for CPU and None for
the currently active GPU device.
diff --git a/torchtext/datasets/sst.py b/torchtext/datasets/sst.py
index 95a04e3f6b..8c793fad93 100644
--- a/torchtext/datasets/sst.py
+++ b/torchtext/datasets/sst.py
@@ -17,7 +17,7 @@ def __init__(self, path, text_field, label_field, subtrees=False,
fine_grained=False, **kwargs):
"""Create an SST dataset instance given a path and fields.
- Arguments:
+ Args:
path: Path to the data file
text_field: The field that will be used for text data.
label_field: The field that will be used for label data.
@@ -49,7 +49,7 @@ def splits(cls, text_field, label_field, root='.data',
train_subtrees=False, **kwargs):
"""Create dataset objects for splits of the SST dataset.
- Arguments:
+ Args:
text_field: The field that will be used for the sentence.
label_field: The field that will be used for label data.
root: The root directory that the dataset's zip archive will be
@@ -81,7 +81,7 @@ def splits(cls, text_field, label_field, root='.data',
def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs):
"""Create iterator objects for splits of the SST dataset.
- Arguments:
+ Args:
batch_size: Batch_size
device: Device to create batches on. Use - 1 for CPU and None for
the currently active GPU device.
diff --git a/torchtext/datasets/text_classification.py b/torchtext/datasets/text_classification.py
index be7400f91b..fc08876a34 100644
--- a/torchtext/datasets/text_classification.py
+++ b/torchtext/datasets/text_classification.py
@@ -78,7 +78,7 @@ class TextClassificationDataset(torch.utils.data.Dataset):
def __init__(self, vocab, data, labels):
"""Initiate text-classification dataset.
- Arguments:
+ Args:
vocab: Vocabulary object used for dataset.
data: a list of label/tokens tuple. tokens are a tensor after
numericalizing the string tokens. label is an integer.
@@ -154,7 +154,7 @@ def AG_NEWS(*args, **kwargs):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -183,7 +183,7 @@ def SogouNews(*args, **kwargs):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -221,7 +221,7 @@ def DBpedia(*args, **kwargs):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -247,7 +247,7 @@ def YelpReviewPolarity(*args, **kwargs):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -272,7 +272,7 @@ def YelpReviewFull(*args, **kwargs):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -306,7 +306,7 @@ def YahooAnswers(*args, **kwargs):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -332,7 +332,7 @@ def AmazonReviewPolarity(*args, **kwargs):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -357,7 +357,7 @@ def AmazonReviewFull(*args, **kwargs):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the dataset are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
diff --git a/torchtext/datasets/translation.py b/torchtext/datasets/translation.py
index 058022999f..cbb7ebdb39 100644
--- a/torchtext/datasets/translation.py
+++ b/torchtext/datasets/translation.py
@@ -17,7 +17,7 @@ def sort_key(ex):
def __init__(self, path, exts, fields, **kwargs):
"""Create a TranslationDataset given paths and fields.
- Arguments:
+ Args:
path: Common prefix of paths to the data files for both languages.
exts: A tuple containing the extension to path for each language.
fields: A tuple containing the fields that will be used for data
@@ -46,7 +46,7 @@ def splits(cls, exts, fields, path=None, root='.data',
train='train', validation='val', test='test', **kwargs):
"""Create dataset objects for splits of a TranslationDataset.
- Arguments:
+ Args:
exts: A tuple containing the extension to path for each language.
fields: A tuple containing the fields that will be used for data
in each language.
@@ -87,7 +87,7 @@ def splits(cls, exts, fields, root='.data',
train='train', validation='val', test='test2016', **kwargs):
"""Create dataset objects for splits of the Multi30k dataset.
- Arguments:
+ Args:
exts: A tuple containing the extension to path for each language.
fields: A tuple containing the fields that will be used for data
in each language.
@@ -127,7 +127,7 @@ def splits(cls, exts, fields, root='.data',
test='IWSLT16.TED.tst2014', **kwargs):
"""Create dataset objects for splits of the IWSLT dataset.
- Arguments:
+ Args:
exts: A tuple containing the extension to path for each language.
fields: A tuple containing the fields that will be used for data
in each language.
@@ -201,7 +201,7 @@ def splits(cls, exts, fields, root='.data',
test='newstest2014.tok.bpe.32000', **kwargs):
"""Create dataset objects for splits of the WMT 2014 dataset.
- Arguments:
+ Args:
exts: A tuple containing the extensions for each language. Must be
either ('.en', '.de') or the reverse.
fields: A tuple containing the fields that will be used for data
diff --git a/torchtext/datasets/trec.py b/torchtext/datasets/trec.py
index d96723c672..6e8792b519 100644
--- a/torchtext/datasets/trec.py
+++ b/torchtext/datasets/trec.py
@@ -18,7 +18,7 @@ def __init__(self, path, text_field, label_field,
fine_grained=False, **kwargs):
"""Create an TREC dataset instance given a path and fields.
- Arguments:
+ Args:
path: Path to the data file.
text_field: The field that will be used for text data.
label_field: The field that will be used for label data.
@@ -46,7 +46,7 @@ def splits(cls, text_field, label_field, root='.data',
train='train_5500.label', test='TREC_10.label', **kwargs):
"""Create dataset objects for splits of the TREC dataset.
- Arguments:
+ Args:
text_field: The field that will be used for the sentence.
label_field: The field that will be used for label data.
root: Root dataset storage directory. Default is '.data'.
@@ -64,7 +64,7 @@ def splits(cls, text_field, label_field, root='.data',
def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs):
"""Create iterator objects for splits of the TREC dataset.
- Arguments:
+ Args:
batch_size: Batch_size
device: Device to create batches on. Use - 1 for CPU and None for
the currently active GPU device.
diff --git a/torchtext/datasets/unsupervised_learning.py b/torchtext/datasets/unsupervised_learning.py
index f1a97459b4..1babf1bfca 100644
--- a/torchtext/datasets/unsupervised_learning.py
+++ b/torchtext/datasets/unsupervised_learning.py
@@ -85,7 +85,7 @@ class EnWik9(torch.utils.data.Dataset):
def __init__(self, begin_line=0, num_lines=6348957, root='.data'):
"""Initiate EnWik9 dataset.
- Arguments:
+ Args:
begin_line: the number of beginning line. Default: 0
num_lines: the number of lines to be loaded. Default: 6348957
root: Directory where the datasets are saved. Default: ".data"
diff --git a/torchtext/experimental/datasets/language_modeling.py b/torchtext/experimental/datasets/language_modeling.py
index 777f04d93c..3350c3f3b0 100644
--- a/torchtext/experimental/datasets/language_modeling.py
+++ b/torchtext/experimental/datasets/language_modeling.py
@@ -30,7 +30,7 @@ class LanguageModelingDataset(torch.utils.data.Dataset):
def __init__(self, data, vocab, transform):
"""Initiate language modeling dataset.
- Arguments:
+ Args:
data: a tensor of tokens. tokens are ids after
numericalizing the string tokens.
torch.tensor([token_id_1, token_id_2, token_id_3, token_id1]).long()
@@ -94,7 +94,7 @@ def WikiText2(tokenizer=None, root='.data', vocab=None, data_select=('train', 'v
Create language modeling dataset: WikiText2
Separately returns the train/test/valid set
- Arguments:
+ Args:
tokenizer: the tokenizer used to preprocess raw text data.
The default one is basic_english tokenizer in fastText. spacy tokenizer
is supported as well (see example below). A custom tokenizer is callable
@@ -128,7 +128,7 @@ def WikiText103(tokenizer=None, root='.data', vocab=None, data_select=('train',
Create language modeling dataset: WikiText103
Separately returns the train/test/valid set
- Arguments:
+ Args:
tokenizer: the tokenizer used to preprocess raw text data.
The default one is basic_english tokenizer in fastText. spacy tokenizer
is supported as well (see example below). A custom tokenizer is callable
@@ -163,7 +163,7 @@ def PennTreebank(tokenizer=None, root='.data', vocab=None, data_select=('train',
Create language modeling dataset: PennTreebank
Separately returns the train/test/valid set
- Arguments:
+ Args:
tokenizer: the tokenizer used to preprocess raw text data.
The default one is basic_english tokenizer in fastText. spacy tokenizer
is supported as well (see example below). A custom tokenizer is callable
@@ -198,7 +198,7 @@ def WMTNewsCrawl(tokenizer=None, root='.data', vocab=None, data_select=('train')
Create language modeling dataset: WMTNewsCrawl
returns the train set
- Arguments:
+ Args:
tokenizer: the tokenizer used to preprocess raw text data.
The default one is basic_english tokenizer in fastText. spacy tokenizer
is supported as well (see example below). A custom tokenizer is callable
diff --git a/torchtext/experimental/datasets/question_answer.py b/torchtext/experimental/datasets/question_answer.py
index fb3b390dd8..ec239deb97 100644
--- a/torchtext/experimental/datasets/question_answer.py
+++ b/torchtext/experimental/datasets/question_answer.py
@@ -24,7 +24,7 @@ class QuestionAnswerDataset(torch.utils.data.Dataset):
def __init__(self, data, vocab, transforms):
"""Initiate question answer dataset.
- Arguments:
+ Args:
data: a tuple of (context, question, answers, ans_pos).
vocab: Vocabulary object used for dataset.
transforms: a dictionary of transforms.
@@ -96,7 +96,7 @@ def SQuAD1(root='.data', vocab=None, tokenizer=None, data_select=('train', 'dev'
Separately returns the train and dev dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
vocab: Vocabulary used for dataset. If None, it will generate a new
vocabulary based on the train data set.
@@ -130,7 +130,7 @@ def SQuAD2(root='.data', vocab=None, tokenizer=None, data_select=('train', 'dev'
Separately returns the train and dev dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
vocab: Vocabulary used for dataset. If None, it will generate a new
vocabulary based on the train data set.
diff --git a/torchtext/experimental/datasets/raw/language_modeling.py b/torchtext/experimental/datasets/raw/language_modeling.py
index e369c4ebb3..50f3f82a80 100644
--- a/torchtext/experimental/datasets/raw/language_modeling.py
+++ b/torchtext/experimental/datasets/raw/language_modeling.py
@@ -63,7 +63,7 @@ def WikiText2(root='.data', data_select=('train', 'valid', 'test')):
Create language modeling dataset: WikiText2
Separately returns the train/test/valid set
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tupel for the returned datasets. Default: ('train', 'valid, 'test')
By default, all the three datasets (train, test, valid) are generated. Users
@@ -88,7 +88,7 @@ def WikiText103(root='.data', data_select=('train', 'valid', 'test')):
Create language modeling dataset: WikiText103
Separately returns the train/test/valid set
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: the returned datasets. Default: ('train', 'valid','test')
By default, all the three datasets (train, test, valid) are generated. Users
@@ -111,7 +111,7 @@ def PennTreebank(root='.data', data_select=('train', 'valid', 'test')):
Create language modeling dataset: PennTreebank
Separately returns the train/test/valid set
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets
(Default: ('train', 'test','valid'))
@@ -136,7 +136,7 @@ def WMTNewsCrawl(root='.data', data_select=('train'), year=2010, language='en'):
Create language modeling dataset: WMTNewsCrawl
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets.
(Default: 'train')
diff --git a/torchtext/experimental/datasets/raw/question_answer.py b/torchtext/experimental/datasets/raw/question_answer.py
index 71b63df5d5..d21dbdbc55 100644
--- a/torchtext/experimental/datasets/raw/question_answer.py
+++ b/torchtext/experimental/datasets/raw/question_answer.py
@@ -46,7 +46,7 @@ def SQuAD1(root='.data', data_select=('train', 'dev')):
['Saint Bernadette Soubirous'],
[515])
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets (Default: ('train', 'dev'))
By default, both datasets (train, dev) are generated. Users could also choose any one or two of them,
@@ -70,7 +70,7 @@ def SQuAD2(root='.data', data_select=('train', 'dev')):
['in the late 1990s'],
[269])
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets (Default: ('train', 'dev'))
By default, both datasets (train, dev) are generated. Users could also choose any one or two of them,
diff --git a/torchtext/experimental/datasets/raw/sequence_tagging.py b/torchtext/experimental/datasets/raw/sequence_tagging.py
index b584d147ab..c1a67261f1 100644
--- a/torchtext/experimental/datasets/raw/sequence_tagging.py
+++ b/torchtext/experimental/datasets/raw/sequence_tagging.py
@@ -69,7 +69,7 @@ def UDPOS(root=".data", data_select=('train', 'valid', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets (Default: ('train', 'valid', 'test'))
By default, all the datasets (train, valid, test) are generated.
@@ -88,7 +88,7 @@ def CoNLL2000Chunking(root=".data", data_select=('train', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets (Default: ('train', 'test'))
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
diff --git a/torchtext/experimental/datasets/raw/text_classification.py b/torchtext/experimental/datasets/raw/text_classification.py
index 46ee16b982..694e4d5d29 100644
--- a/torchtext/experimental/datasets/raw/text_classification.py
+++ b/torchtext/experimental/datasets/raw/text_classification.py
@@ -61,7 +61,7 @@ def AG_NEWS(root='.data', data_select=('train', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -81,7 +81,7 @@ def SogouNews(root='.data', data_select=('train', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -101,7 +101,7 @@ def DBpedia(root='.data', data_select=('train', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -121,7 +121,7 @@ def YelpReviewPolarity(root='.data', data_select=('train', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -141,7 +141,7 @@ def YelpReviewFull(root='.data', data_select=('train', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -161,7 +161,7 @@ def YahooAnswers(root='.data', data_select=('train', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -181,7 +181,7 @@ def AmazonReviewPolarity(root='.data', data_select=('train', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -201,7 +201,7 @@ def AmazonReviewFull(root='.data', data_select=('train', 'test')):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -231,7 +231,7 @@ def IMDB(root='.data', data_select=('train', 'test')):
Separately returns the raw training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
diff --git a/torchtext/experimental/datasets/raw/translation.py b/torchtext/experimental/datasets/raw/translation.py
index c36f9abd87..3b74c58421 100644
--- a/torchtext/experimental/datasets/raw/translation.py
+++ b/torchtext/experimental/datasets/raw/translation.py
@@ -234,7 +234,7 @@ def Multi30k(train_filenames=("train.de", "train.en"),
val.5.de
val.5.en
- Arguments:
+ Args:
train_filenames: the source and target filenames for training.
Default: ('train.de', 'train.en')
valid_filenames: the source and target filenames for valid.
@@ -400,7 +400,7 @@ def IWSLT(train_filenames=('train.de-en.de', 'train.de-en.en'),
train.tags.fr-en.en
train.tags.fr-en.fr
- Arguments:
+ Args:
train_filenames: the source and target filenames for training.
Default: ('train.de-en.de', 'train.de-en.en')
valid_filenames: the source and target filenames for valid.
@@ -486,7 +486,7 @@ def WMT14(train_filenames=('train.tok.clean.bpe.32000.de',
newstest2015.tok.bpe.32000.de
train.tok.clean.bpe.32000.de
- Arguments:
+ Args:
train_filenames: the source and target filenames for training.
Default: ('train.tok.clean.bpe.32000.de', 'train.tok.clean.bpe.32000.en')
valid_filenames: the source and target filenames for valid.
diff --git a/torchtext/experimental/datasets/sequence_tagging.py b/torchtext/experimental/datasets/sequence_tagging.py
index 3c0448e292..26b52ab206 100644
--- a/torchtext/experimental/datasets/sequence_tagging.py
+++ b/torchtext/experimental/datasets/sequence_tagging.py
@@ -74,7 +74,7 @@ class SequenceTaggingDataset(torch.utils.data.Dataset):
def __init__(self, data, vocabs, transforms):
"""Initiate sequence tagging dataset.
- Arguments:
+ Args:
data: a list of word and its respective tags. Example:
[[word, POS, dep_parsing label, ...]]
vocabs: a list of vocabularies for its respective tags.
@@ -113,7 +113,7 @@ def UDPOS(root=".data", vocabs=None, data_select=("train", "valid", "test")):
Separately returns the training, validation, and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
vocabs: A list of voabularies for each columns in the dataset. Must be in an
instance of List
@@ -139,7 +139,7 @@ def CoNLL2000Chunking(root=".data", vocabs=None, data_select=("train", "test")):
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
vocabs: A list of voabularies for each columns in the dataset. Must be in an
instance of List
diff --git a/torchtext/experimental/datasets/text_classification.py b/torchtext/experimental/datasets/text_classification.py
index 1ba9819f9b..0646e8d63a 100644
--- a/torchtext/experimental/datasets/text_classification.py
+++ b/torchtext/experimental/datasets/text_classification.py
@@ -38,7 +38,7 @@ class TextClassificationDataset(torch.utils.data.Dataset):
def __init__(self, data, vocab, transforms):
"""Initiate text-classification dataset.
- Arguments:
+ Args:
data: a list of label and text tring tuple. label is an integer.
[(label1, text1), (label2, text2), (label2, text3)]
vocab: Vocabulary object used for dataset.
@@ -113,7 +113,7 @@ def AG_NEWS(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('tr
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -157,7 +157,7 @@ def SogouNews(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -210,7 +210,7 @@ def DBpedia(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('tr
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -251,7 +251,7 @@ def YelpReviewPolarity(root='.data', ngrams=1, vocab=None, tokenizer=None, data_
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -291,7 +291,7 @@ def YelpReviewFull(root='.data', ngrams=1, vocab=None, tokenizer=None, data_sele
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -340,7 +340,7 @@ def YahooAnswers(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -381,7 +381,7 @@ def AmazonReviewPolarity(root='.data', ngrams=1, vocab=None, tokenizer=None, dat
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -421,7 +421,7 @@ def AmazonReviewFull(root='.data', ngrams=1, vocab=None, tokenizer=None, data_se
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
@@ -462,7 +462,7 @@ def IMDB(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('train
Separately returns the training and test dataset
- Arguments:
+ Args:
root: Directory where the datasets are saved. Default: ".data"
ngrams: a contiguous sequence of n items from s string text.
Default: 1
diff --git a/torchtext/experimental/datasets/translation.py b/torchtext/experimental/datasets/translation.py
index abd626d2ec..c38d17401d 100644
--- a/torchtext/experimental/datasets/translation.py
+++ b/torchtext/experimental/datasets/translation.py
@@ -95,7 +95,7 @@ class TranslationDataset(torch.utils.data.Dataset):
def __init__(self, data, vocab, transforms):
"""Initiate translation dataset.
- Arguments:
+ Args:
data: a tuple of source and target tensors, which include token ids
numericalizing the string tokens.
[(src_tensor0, tgt_tensor0), (src_tensor1, tgt_tensor1)]
@@ -143,7 +143,7 @@ def Multi30k(train_filenames=("train.de", "train.en"),
""" Define translation datasets: Multi30k
Separately returns train/valid/test datasets as a tuple
- Arguments:
+ Args:
train_filenames: the source and target filenames for training.
Default: ('train.de', 'train.en')
valid_filenames: the source and target filenames for valid.
@@ -245,7 +245,7 @@ def IWSLT(train_filenames=('train.de-en.de', 'train.de-en.en'),
Separately returns train/valid/test datasets
The available datasets include:
- Arguments:
+ Args:
train_filenames: the source and target filenames for training.
Default: ('train.de-en.de', 'train.de-en.en')
valid_filenames: the source and target filenames for valid.
@@ -486,7 +486,7 @@ def WMT14(train_filenames=('train.tok.clean.bpe.32000.de',
newstest2015.tok.bpe.32000.de
train.tok.clean.bpe.32000.de
- Arguments:
+ Args:
train_filenames: the source and target filenames for training.
Default: ('train.tok.clean.bpe.32000.de', 'train.tok.clean.bpe.32000.en')
valid_filenames: the source and target filenames for valid.
diff --git a/torchtext/experimental/transforms.py b/torchtext/experimental/transforms.py
index 6c3896aa61..1f62ea7032 100644
--- a/torchtext/experimental/transforms.py
+++ b/torchtext/experimental/transforms.py
@@ -209,7 +209,7 @@ def to_ivalue(self):
def load_sp_model(sp_model):
r"""Load a sentencepiece model for file.
- Arguments:
+ Args:
sp_model: the file path or a file object saving the sentencepiece model.
Outputs:
diff --git a/torchtext/experimental/vectors.py b/torchtext/experimental/vectors.py
index a606e12bcc..72bae2351b 100644
--- a/torchtext/experimental/vectors.py
+++ b/torchtext/experimental/vectors.py
@@ -185,7 +185,7 @@ def load_vectors_from_file_path(filepath, delimiter=",", unk_tensor=None, num_cp
def build_vectors(tokens, vectors, unk_tensor=None):
r"""Factory method for creating a vectors object which maps tokens to vectors.
- Arguments:
+ Args:
tokens (List[str]): a list of tokens.
vectors (torch.Tensor): a 2d tensor representing the vector associated with each token.
unk_tensor (torch.Tensor): a 1d tensors representing the vector associated with an unknown token.
diff --git a/torchtext/experimental/vocab.py b/torchtext/experimental/vocab.py
index a7707003ee..6883326938 100644
--- a/torchtext/experimental/vocab.py
+++ b/torchtext/experimental/vocab.py
@@ -85,7 +85,7 @@ def build_vocab_from_iterator(iterator, min_freq=1, unk_token=''):
"""
Build a Vocab from an iterator.
- Arguments:
+ Args:
iterator: Iterator used to build Vocab. Must yield list or iterator of tokens.
min_freq: The minimum frequency needed to include a token in the vocabulary.
Values less than 1 will be set to 1. Default: 1.
@@ -108,7 +108,7 @@ def vocab(ordered_dict, min_freq=1, unk_token=''):
Therefore if sorting by token frequency is important to the user, the `ordered_dict` should be created in a way to reflect this.
Additionally, the if the `unk_token` isn't found inside of the `ordered_dict`, it will be added to the end of the vocab.
- Arguments:
+ Args:
ordered_dict (collections.OrderedDict): object holding the frequencies of each token found in the data.
min_freq: The minimum frequency needed to include a token in the vocabulary.
Values less than 1 will be set to 1. Default: 1.
@@ -147,7 +147,7 @@ class Vocab(nn.Module):
__jit_unused_properties__ = ["is_jitable"]
r"""Creates a vocab object which maps tokens to indices.
- Arguments:
+ Args:
vocab (torch.classes.torchtext.Vocab or torchtext._torchtext.Vocab): a cpp vocab object.
"""
diff --git a/torchtext/utils.py b/torchtext/utils.py
index 1e4974df21..d5a749487f 100644
--- a/torchtext/utils.py
+++ b/torchtext/utils.py
@@ -38,7 +38,7 @@ def download_from_url(url, path=None, root='.data', overwrite=False, hash_value=
"""Download file, with logic (from tensor2tensor) for Google Drive. Returns
the path to the downloaded file.
- Arguments:
+ Args:
url: the url of the file from URL header. (None)
root: download folder used to store the file in (.data)
overwrite: overwrite existing files (False)
@@ -136,7 +136,7 @@ def unicode_csv_reader(unicode_csv_data, **kwargs):
Borrowed and slightly modified from the Python docs:
https://docs.python.org/2/library/csv.html#csv-examples
- Arguments:
+ Args:
unicode_csv_data: unicode csv data (see example below)
Examples:
@@ -171,7 +171,7 @@ def utf_8_encoder(unicode_csv_data):
def extract_archive(from_path, to_path=None, overwrite=False):
"""Extract archive.
- Arguments:
+ Args:
from_path: the path of the archive.
to_path: the root path of the extracted files (directory of from_path)
overwrite: overwrite existing files (False)
diff --git a/torchtext/vocab.py b/torchtext/vocab.py
index fe5101d16d..516f158a16 100755
--- a/torchtext/vocab.py
+++ b/torchtext/vocab.py
@@ -35,7 +35,7 @@ def __init__(self, counter, max_size=None, min_freq=1, specials=('', '
vectors=None, unk_init=None, vectors_cache=None, specials_first=True):
"""Create a Vocab object from a collections.Counter.
- Arguments:
+ Args:
counter: collections.Counter object holding the frequencies of
each value found in the data.
max_size: The maximum size of the vocabulary, or None for no
@@ -149,7 +149,7 @@ def extend(self, v, sort=False):
def load_vectors(self, vectors, **kwargs):
"""
- Arguments:
+ Args:
vectors: one of or a list containing instantiations of the
GloVe, CharNGram, or Vectors classes. Alternatively, one
of or a list of available pretrained vectors:
@@ -201,7 +201,7 @@ def set_vectors(self, stoi, vectors, dim, unk_init=torch.Tensor.zero_):
"""
Set the vectors for the Vocab instance from a collection of Tensors.
- Arguments:
+ Args:
stoi: A dictionary of string to the index of the associated vector
in the `vectors` input argument.
vectors: An indexed iterable (or other structure supporting __getitem__) that
@@ -228,7 +228,7 @@ def __init__(self, counter, max_size=None, specials=(''),
vectors=None, unk_init=torch.Tensor.zero_):
"""Create a revtok subword vocabulary from a collections.Counter.
- Arguments:
+ Args:
counter: collections.Counter object holding the frequencies of
each word found in the data.
max_size: The maximum size of the subword vocabulary, or None for no
@@ -301,7 +301,7 @@ class Vectors(object):
def __init__(self, name, cache=None,
url=None, unk_init=None, max_vectors=None):
"""
- Arguments:
+ Args:
name: name of the file that contains the vectors
cache: directory for cached vectors
@@ -440,7 +440,7 @@ def __len__(self):
def get_vecs_by_tokens(self, tokens, lower_case_backup=False):
"""Look up embedding vectors of tokens.
- Arguments:
+ Args:
tokens: a token or a list of tokens. if `tokens` is a string,
returns a 1-D tensor of shape `self.dim`; if `tokens` is a
list of strings, returns a 2-D tensor of shape=(len(tokens),
@@ -549,7 +549,7 @@ def build_vocab_from_iterator(iterator, num_lines=None):
"""
Build a Vocab from an iterator.
- Arguments:
+ Args:
iterator: Iterator used to build Vocab. Must yield list or iterator of tokens.
num_lines: The expected number of elements returned by the iterator.
(Default: None)