From 1ff4043c2fd40e90e71df738e585a5e81b687e7f Mon Sep 17 00:00:00 2001
From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com>
Date: Tue, 22 Dec 2020 23:31:45 +1100
Subject: [PATCH] [*.py] Rename "Arguments:" to "Args:"

---
 .../text_classification/iterable_train.py     |  8 ++++----
 examples/text_classification/model.py         |  2 +-
 examples/text_classification/predict.py       |  2 +-
 examples/text_classification/train.py         |  4 ++--
 torchtext/data/dataset.py                     | 14 ++++++-------
 torchtext/data/field.py                       | 16 +++++++--------
 torchtext/data/functional.py                  | 10 +++++-----
 torchtext/data/iterator.py                    |  2 +-
 torchtext/data/metrics.py                     |  4 ++--
 torchtext/data/pipeline.py                    | 10 +++++-----
 torchtext/data/utils.py                       |  4 ++--
 torchtext/datasets/imdb.py                    |  6 +++---
 torchtext/datasets/language_modeling.py       | 14 ++++++-------
 torchtext/datasets/nli.py                     |  4 ++--
 torchtext/datasets/sst.py                     |  6 +++---
 torchtext/datasets/text_classification.py     | 18 ++++++++---------
 torchtext/datasets/translation.py             | 10 +++++-----
 torchtext/datasets/trec.py                    |  6 +++---
 torchtext/datasets/unsupervised_learning.py   |  2 +-
 .../datasets/language_modeling.py             | 10 +++++-----
 .../experimental/datasets/question_answer.py  |  6 +++---
 .../datasets/raw/language_modeling.py         |  8 ++++----
 .../datasets/raw/question_answer.py           |  4 ++--
 .../datasets/raw/sequence_tagging.py          |  4 ++--
 .../datasets/raw/text_classification.py       | 18 ++++++++---------
 .../experimental/datasets/raw/translation.py  |  6 +++---
 .../experimental/datasets/sequence_tagging.py |  6 +++---
 .../datasets/text_classification.py           | 20 +++++++++----------
 .../experimental/datasets/translation.py      |  8 ++++----
 torchtext/experimental/transforms.py          |  2 +-
 torchtext/experimental/vectors.py             |  2 +-
 torchtext/experimental/vocab.py               |  6 +++---
 torchtext/utils.py                            |  6 +++---
 torchtext/vocab.py                            | 14 ++++++-------
 34 files changed, 131 insertions(+), 131 deletions(-)

diff --git a/examples/text_classification/iterable_train.py b/examples/text_classification/iterable_train.py
index d4e6507bc5..94b3c1c3a3 100644
--- a/examples/text_classification/iterable_train.py
+++ b/examples/text_classification/iterable_train.py
@@ -60,7 +60,7 @@ def train_and_valid(lr_, num_epoch, train_data_, valid_data_):
     r"""
     Here we use SGD optimizer to train the model.
 
-    Arguments:
+    Args:
         lr_: learning rate
         num_epoch: the number of epoches for training the model
         train_data_: the data used to train the model
@@ -108,7 +108,7 @@ def train_and_valid(lr_, num_epoch, train_data_, valid_data_):
 
 def test(data_):
     r"""
-    Arguments:
+    Args:
         data_: the data used to train the model
     """
     data = DataLoader(
@@ -137,7 +137,7 @@ def get_csv_iterator(data_path, ngrams, vocab, start=0, num_lines=None):
     Generate an iterator to read CSV file.
     The yield values are an integer for the label and a tensor for the text part.
 
-    Arguments:
+    Args:
         data_path: a path for the data file.
         ngrams: the number used for ngrams.
         vocab: a vocab object saving the string-to-index information
@@ -171,7 +171,7 @@ class Dataset(torch.utils.data.IterableDataset):
     An iterable dataset to save the data. This dataset supports multi-processing
     to load the data.
 
-    Arguments:
+    Args:
         iterator: the iterator to read data.
         num_lines: the number of lines read by the individual iterator.
     """
diff --git a/examples/text_classification/model.py b/examples/text_classification/model.py
index e96d1d0125..1314b1b55a 100644
--- a/examples/text_classification/model.py
+++ b/examples/text_classification/model.py
@@ -31,7 +31,7 @@ def init_weights(self):
 
     def forward(self, text, offsets):
         r"""
-        Arguments:
+        Args:
             text: 1-D tensor representing a bag of text tensors
             offsets: a list of offsets to delimit the 1-D text tensor
                 into the individual sequences.
diff --git a/examples/text_classification/predict.py b/examples/text_classification/predict.py
index b11de131a9..4bdd1f7fce 100644
--- a/examples/text_classification/predict.py
+++ b/examples/text_classification/predict.py
@@ -11,7 +11,7 @@ def predict(text, model, dictionary, ngrams):
     The input text is numericalized with the vocab and then sent to
     the model for inference.
 
-    Arguments:
+    Args:
         text: a sample text string
         model: the trained model
         dictionary: a vocab object for the information of string-to-index
diff --git a/examples/text_classification/train.py b/examples/text_classification/train.py
index fbd0831832..84f20fb0d4 100644
--- a/examples/text_classification/train.py
+++ b/examples/text_classification/train.py
@@ -56,7 +56,7 @@ def train_and_valid(lr_, sub_train_, sub_valid_):
     We use a SGD optimizer to train the model here and the learning rate
     decreases linearly with the progress of the training process.
 
-    Arguments:
+    Args:
         lr_: learning rate
         sub_train_: the data used to train the model
         sub_valid_: the data used for validation
@@ -94,7 +94,7 @@ def train_and_valid(lr_, sub_train_, sub_valid_):
 
 def test(data_):
     r"""
-    Arguments:
+    Args:
         data_: the data used to train the model
     """
     data = DataLoader(data_, batch_size=batch_size, collate_fn=generate_batch)
diff --git a/torchtext/data/dataset.py b/torchtext/data/dataset.py
index c9efe9168b..eecfc49d9c 100644
--- a/torchtext/data/dataset.py
+++ b/torchtext/data/dataset.py
@@ -29,7 +29,7 @@ class Dataset(torch.utils.data.Dataset):
     def __init__(self, examples, fields, filter_pred=None):
         """Create a dataset from a list of Examples and Fields.
 
-        Arguments:
+        Args:
             examples: List of Examples.
             fields (List(tuple(str, Field))): The Fields to use in this tuple. The
                 string is a field name, and the Field is the associated field.
@@ -55,7 +55,7 @@ def splits(cls, path=None, root='.data', train=None, validation=None,
                test=None, **kwargs):
         """Create Dataset objects for multiple splits of a dataset.
 
-        Arguments:
+        Args:
             path (str): Common prefix of the splits' file paths, or None to use
                 the result of cls.download(root).
             root (str): Root dataset storage directory. Default is '.data'.
@@ -87,7 +87,7 @@ def split(self, split_ratio=0.7, stratified=False, strata_field='label',
               random_state=None):
         """Create train-test(-valid?) splits from the instance's examples.
 
-        Arguments:
+        Args:
             split_ratio (float or List of floats): a number [0, 1] denoting the amount
                 of data to be used for the training split (rest is used for test),
                 or a list of numbers denoting the relative sizes of train, test and valid
@@ -157,7 +157,7 @@ def __getattr__(self, attr):
     def download(cls, root, check=None):
         """Download and unzip an online archive (.zip, .gz, or .tgz).
 
-        Arguments:
+        Args:
             root (str): Folder to download data to.
             check (str or None): Folder whose existence indicates
                 that the dataset has already been downloaded, or
@@ -201,7 +201,7 @@ def download(cls, root, check=None):
     def filter_examples(self, field_names):
         """Remove unknown words from dataset examples with respect to given field.
 
-        Arguments:
+        Args:
             field_names (list(str)): Within example only the parts with field names in
                 field_names will have their unknown words deleted.
         """
@@ -221,7 +221,7 @@ def __init__(self, path, format, fields, skip_header=False,
                  csv_reader_params={}, **kwargs):
         """Create a TabularDataset given a path, file format, and field list.
 
-        Arguments:
+        Args:
             path (str): Path to the data file.
             format (str): The format of the data file. One of "CSV", "TSV", or
                 "JSON" (case-insensitive).
@@ -325,7 +325,7 @@ def stratify(examples, strata_field):
 def rationed_split(examples, train_ratio, test_ratio, val_ratio, rnd):
     """Create a random permutation of examples, then split them by ratios
 
-    Arguments:
+    Args:
         examples: a list of data
         train_ratio, test_ratio, val_ratio: split fractions.
         rnd: a random shuffler
diff --git a/torchtext/data/field.py b/torchtext/data/field.py
index e117b2edb7..95be1b85f2 100644
--- a/torchtext/data/field.py
+++ b/torchtext/data/field.py
@@ -274,7 +274,7 @@ def pad(self, minibatch):
     def build_vocab(self, *args, **kwargs):
         """Construct the Vocab object for this field from one or more datasets.
 
-        Arguments:
+        Args:
             Positional arguments: Dataset objects or other iterable data
                 sources from which to construct the Vocab object that
                 represents the set of possible values for this field. If
@@ -311,7 +311,7 @@ def numericalize(self, arr, device=None):
         If the field has include_lengths=True, a tensor of lengths will be
         included in the return value.
 
-        Arguments:
+        Args:
             arr (List[List[str]], or tuple of (List[List[str]], List[int])):
                 List of tokenized and padded examples, or tuple of List of
                 tokenized and padded examples and List of lengths of each
@@ -423,7 +423,7 @@ def __init__(self, **kwargs):
     def segment(self, *args):
         """Segment one or more datasets with this subword field.
 
-        Arguments:
+        Args:
             Positional arguments: Dataset objects or other indexable
                 mutable sequences to segment. If a Dataset object is provided,
                 all columns corresponding to this field are used; individual
@@ -455,7 +455,7 @@ class NestedField(Field):
     primarily used to implement character embeddings. See ``tests/data/test_field.py``
     for examples on how to use this field.
 
-    Arguments:
+    Args:
         nesting_field (Field): A field contained in this nested field.
         use_vocab (bool): Whether to use a Vocab object. If False, the data in this
             field should already be numerical. Default: ``True``.
@@ -533,7 +533,7 @@ def preprocess(self, xs):
         the list is preprocessed using ``self.nesting_field.preprocess`` and the resulting
         list is returned.
 
-        Arguments:
+        Args:
             xs (list or str): The input to preprocess.
 
         Returns:
@@ -576,7 +576,7 @@ def pad(self, minibatch):
                     ['<w>', '</s>', '</w>', '<c>', '<c>', '<c>', '<c>'],
                     ['<c>', '<c>', '<c>', '<c>', '<c>', '<c>', '<c>']]]
 
-        Arguments:
+        Args:
             minibatch (list): Each element is a list of string if
                 ``self.nesting_field.sequential`` is ``False``, a list of list of string
                 otherwise.
@@ -646,7 +646,7 @@ def pad(self, minibatch):
     def build_vocab(self, *args, **kwargs):
         """Construct the Vocab object for nesting field and combine it with this field's vocab.
 
-        Arguments:
+        Args:
             Positional arguments: Dataset objects or other iterable data
                 sources from which to construct the Vocab object that
                 represents the set of possible values for the nesting field. If
@@ -697,7 +697,7 @@ def numericalize(self, arrs, device=None):
         Each item in the minibatch will be numericalized independently and the resulting
         tensors will be stacked at the first dimension.
 
-        Arguments:
+        Args:
             arr (List[List[str]]): List of tokenized and padded examples.
             device (str or torch.device): A string or instance of `torch.device`
                 specifying which device the Variables are going to be created on.
diff --git a/torchtext/data/functional.py b/torchtext/data/functional.py
index 6e20c8e667..995025e929 100644
--- a/torchtext/data/functional.py
+++ b/torchtext/data/functional.py
@@ -21,7 +21,7 @@ def generate_sp_model(filename, vocab_size=20000,
                       model_prefix='m_user'):
     r"""Train a SentencePiece tokenizer.
 
-    Arguments:
+    Args:
         filename: the data file for training SentencePiece model.
         vocab_size: the size of vocabulary (Default: 20,000).
         model_type: the type of SentencePiece model, including unigram,
@@ -42,7 +42,7 @@ def generate_sp_model(filename, vocab_size=20000,
 def load_sp_model(spm):
     r"""Load a  sentencepiece model for file.
 
-    Arguments:
+    Args:
         spm: the file path or a file object saving the sentencepiece model.
 
     Outputs:
@@ -70,7 +70,7 @@ def sentencepiece_numericalizer(sp_model):
     r"""A sentencepiece model to numericalize a text sentence into
        a generator over the ids.
 
-    Arguments:
+    Args:
         sp_model: a SentencePiece model.
 
     Outputs:
@@ -96,7 +96,7 @@ def sentencepiece_tokenizer(sp_model):
     r"""A sentencepiece model to tokenize a text sentence into
        a generator over the tokens.
 
-    Arguments:
+    Args:
         sp_model: a SentencePiece model.
 
     Outputs:
@@ -157,7 +157,7 @@ def simple_space_split(iterator):
 def numericalize_tokens_from_iterator(vocab, iterator, removed_tokens=None):
     r"""Yield a list of ids from an token iterator with a vocab.
 
-    Arguments:
+    Args:
         vocab: the vocabulary convert token into id.
         iterator: the iterator yield a list of tokens.
         removed_tokens: removed tokens from output dataset (Default: None)
diff --git a/torchtext/data/iterator.py b/torchtext/data/iterator.py
index 4c1119cb8c..3dfa807138 100644
--- a/torchtext/data/iterator.py
+++ b/torchtext/data/iterator.py
@@ -85,7 +85,7 @@ def __init__(self, dataset, batch_size, sort_key=None, device=None,
     def splits(cls, datasets, batch_sizes=None, **kwargs):
         """Create Iterator objects for multiple splits of a dataset.
 
-        Arguments:
+        Args:
             datasets: Tuple of Dataset objects corresponding to the splits. The
                 first such object should be the train set.
             batch_sizes: Tuple of batch sizes to use for the different splits,
diff --git a/torchtext/data/metrics.py b/torchtext/data/metrics.py
index 63e07cdb21..c5c2983ee4 100644
--- a/torchtext/data/metrics.py
+++ b/torchtext/data/metrics.py
@@ -7,7 +7,7 @@
 def _compute_ngram_counter(tokens, max_n):
     """ Create a Counter with a count of unique n-grams in the tokens list
 
-    Arguments:
+    Args:
         tokens: a list of tokens (typically a string split on whitespaces)
         max_n: the maximum order of n-gram wanted
 
@@ -36,7 +36,7 @@ def bleu_score(candidate_corpus, references_corpus, max_n=4, weights=[0.25] * 4)
     """Computes the BLEU score between a candidate translation corpus and a references
     translation corpus. Based on https://www.aclweb.org/anthology/P02-1040.pdf
 
-    Arguments:
+    Args:
         candidate_corpus: an iterable of candidate translations. Each translation is an
             iterable of tokens
         references_corpus: an iterable of iterables of reference translations. Each
diff --git a/torchtext/data/pipeline.py b/torchtext/data/pipeline.py
index f576fdc720..d72ef5ef4c 100644
--- a/torchtext/data/pipeline.py
+++ b/torchtext/data/pipeline.py
@@ -12,7 +12,7 @@ class Pipeline(object):
     def __init__(self, convert_token=None):
         """Create a pipeline.
 
-        Arguments:
+        Args:
             convert_token: The function to apply to input sequence data.
                 If None, the identity function is used. Default: None
         """
@@ -28,7 +28,7 @@ def __init__(self, convert_token=None):
     def __call__(self, x, *args):
         """Apply the the current Pipeline(s) to an input.
 
-        Arguments:
+        Args:
             x: The input to process with the Pipeline(s).
             Positional arguments: Forwarded to the `call` function
                 of the Pipeline(s).
@@ -43,7 +43,7 @@ def call(self, x, *args):
         applying the `convert_token` function to all input elements is
         returned.
 
-        Arguments:
+        Args:
             x: The input to apply the convert_token function to.
             Positional arguments: Forwarded to the `convert_token` function
                 of the current Pipeline.
@@ -55,7 +55,7 @@ def call(self, x, *args):
     def add_before(self, pipeline):
         """Add a Pipeline to be applied before this processing pipeline.
 
-        Arguments:
+        Args:
             pipeline: The Pipeline or callable to apply before this
                 Pipeline.
         """
@@ -67,7 +67,7 @@ def add_before(self, pipeline):
     def add_after(self, pipeline):
         """Add a Pipeline to be applied after this processing pipeline.
 
-        Arguments:
+        Args:
             pipeline: The Pipeline or callable to apply after this
                 Pipeline.
         """
diff --git a/torchtext/data/utils.py b/torchtext/data/utils.py
index 5ecfad1958..045c2646fb 100644
--- a/torchtext/data/utils.py
+++ b/torchtext/data/utils.py
@@ -76,7 +76,7 @@ def get_tokenizer(tokenizer, language='en'):
     r"""
     Generate tokenizer function for a string sentence.
 
-    Arguments:
+    Args:
         tokenizer: the name of tokenizer function. If None, it returns split()
             function, which splits the string sentence by space.
             If basic_english, it returns _basic_english_normalize() function,
@@ -205,7 +205,7 @@ def dtype_to_attr(dtype):
 def ngrams_iterator(token_list, ngrams):
     """Return an iterator that yields the given tokens and their ngrams.
 
-    Arguments:
+    Args:
         token_list: A list of tokens
         ngrams: the number of ngrams.
 
diff --git a/torchtext/datasets/imdb.py b/torchtext/datasets/imdb.py
index 38fccb97be..e59ce19ecb 100644
--- a/torchtext/datasets/imdb.py
+++ b/torchtext/datasets/imdb.py
@@ -18,7 +18,7 @@ def sort_key(ex):
     def __init__(self, path, text_field, label_field, **kwargs):
         """Create an IMDB dataset instance given a path and fields.
 
-        Arguments:
+        Args:
             path: Path to the dataset's highest level directory
             text_field: The field that will be used for text data.
             label_field: The field that will be used for label data.
@@ -41,7 +41,7 @@ def splits(cls, text_field, label_field, root='.data',
                train='train', test='test', **kwargs):
         """Create dataset objects for splits of the IMDB dataset.
 
-        Arguments:
+        Args:
             text_field: The field that will be used for the sentence.
             label_field: The field that will be used for label data.
             root: Root dataset storage directory. Default is '.data'.
@@ -58,7 +58,7 @@ def splits(cls, text_field, label_field, root='.data',
     def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs):
         """Create iterator objects for splits of the IMDB dataset.
 
-        Arguments:
+        Args:
             batch_size: Batch_size
             device: Device to create batches on. Use - 1 for CPU and None for
                 the currently active GPU device.
diff --git a/torchtext/datasets/language_modeling.py b/torchtext/datasets/language_modeling.py
index 0002aabc04..7ebcca71b1 100644
--- a/torchtext/datasets/language_modeling.py
+++ b/torchtext/datasets/language_modeling.py
@@ -9,7 +9,7 @@ def __init__(self, path, text_field, newline_eos=True,
                  encoding='utf-8', **kwargs):
         """Create a LanguageModelingDataset given a path and a field.
 
-        Arguments:
+        Args:
             path: Path to the data file.
             text_field: The field that will be used for text data.
             newline_eos: Whether to add an <eos> token for every newline in the
@@ -44,7 +44,7 @@ def splits(cls, text_field, root='.data', train='wiki.train.tokens',
 
         This is the most flexible way to use the dataset.
 
-        Arguments:
+        Args:
             text_field: The field that will be used for text data.
             root: The root directory that the dataset's zip archive will be
                 expanded into; therefore the directory in whose wikitext-2
@@ -67,7 +67,7 @@ def iters(cls, batch_size=32, bptt_len=35, device=0, root='.data',
         This is the simplest way to use the dataset, and assumes common
         defaults for field, vocabulary, and iterator parameters.
 
-        Arguments:
+        Args:
             batch_size: Batch size.
             bptt_len: Length of sequences for backpropagation through time.
             device: Device to create batches on. Use -1 for CPU and None for
@@ -105,7 +105,7 @@ def splits(cls, text_field, root='.data', train='wiki.train.tokens',
 
         This is the most flexible way to use the dataset.
 
-        Arguments:
+        Args:
             text_field: The field that will be used for text data.
             root: The root directory that the dataset's zip archive will be
                 expanded into; therefore the directory in whose wikitext-103
@@ -128,7 +128,7 @@ def iters(cls, batch_size=32, bptt_len=35, device=0, root='.data',
         This is the simplest way to use the dataset, and assumes common
         defaults for field, vocabulary, and iterator parameters.
 
-        Arguments:
+        Args:
             batch_size: Batch size.
             bptt_len: Length of sequences for backpropagation through time.
             device: Device to create batches on. Use -1 for CPU and None for
@@ -174,7 +174,7 @@ def splits(cls, text_field, root='.data', train='ptb.train.txt',
                **kwargs):
         """Create dataset objects for splits of the Penn Treebank dataset.
 
-        Arguments:
+        Args:
             text_field: The field that will be used for text data.
             root: The root directory where the data files will be stored.
             train: The filename of the train data. Default: 'ptb.train.txt'.
@@ -195,7 +195,7 @@ def iters(cls, batch_size=32, bptt_len=35, device=0, root='.data',
         This is the simplest way to use the dataset, and assumes common
         defaults for field, vocabulary, and iterator parameters.
 
-        Arguments:
+        Args:
             batch_size: Batch size.
             bptt_len: Length of sequences for backpropagation through time.
             device: Device to create batches on. Use -1 for CPU and None for
diff --git a/torchtext/datasets/nli.py b/torchtext/datasets/nli.py
index 9b3d758b06..758576e7cc 100644
--- a/torchtext/datasets/nli.py
+++ b/torchtext/datasets/nli.py
@@ -51,7 +51,7 @@ def splits(cls, text_field, label_field, parse_field=None,
 
         This is the most flexible way to use the dataset.
 
-        Arguments:
+        Args:
             text_field: The field that will be used for premise and hypothesis
                 data.
             label_field: The field that will be used for label data.
@@ -96,7 +96,7 @@ def iters(cls, batch_size=32, device=0, root='.data',
         This is the simplest way to use the dataset, and assumes common
         defaults for field, vocabulary, and iterator parameters.
 
-        Arguments:
+        Args:
             batch_size: Batch size.
             device: Device to create batches on. Use -1 for CPU and None for
                 the currently active GPU device.
diff --git a/torchtext/datasets/sst.py b/torchtext/datasets/sst.py
index 95a04e3f6b..8c793fad93 100644
--- a/torchtext/datasets/sst.py
+++ b/torchtext/datasets/sst.py
@@ -17,7 +17,7 @@ def __init__(self, path, text_field, label_field, subtrees=False,
                  fine_grained=False, **kwargs):
         """Create an SST dataset instance given a path and fields.
 
-        Arguments:
+        Args:
             path: Path to the data file
             text_field: The field that will be used for text data.
             label_field: The field that will be used for label data.
@@ -49,7 +49,7 @@ def splits(cls, text_field, label_field, root='.data',
                train_subtrees=False, **kwargs):
         """Create dataset objects for splits of the SST dataset.
 
-        Arguments:
+        Args:
             text_field: The field that will be used for the sentence.
             label_field: The field that will be used for label data.
             root: The root directory that the dataset's zip archive will be
@@ -81,7 +81,7 @@ def splits(cls, text_field, label_field, root='.data',
     def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs):
         """Create iterator objects for splits of the SST dataset.
 
-        Arguments:
+        Args:
             batch_size: Batch_size
             device: Device to create batches on. Use - 1 for CPU and None for
                 the currently active GPU device.
diff --git a/torchtext/datasets/text_classification.py b/torchtext/datasets/text_classification.py
index be7400f91b..fc08876a34 100644
--- a/torchtext/datasets/text_classification.py
+++ b/torchtext/datasets/text_classification.py
@@ -78,7 +78,7 @@ class TextClassificationDataset(torch.utils.data.Dataset):
     def __init__(self, vocab, data, labels):
         """Initiate text-classification dataset.
 
-        Arguments:
+        Args:
             vocab: Vocabulary object used for dataset.
             data: a list of label/tokens tuple. tokens are a tensor after
                 numericalizing the string tokens. label is an integer.
@@ -154,7 +154,7 @@ def AG_NEWS(*args, **kwargs):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -183,7 +183,7 @@ def SogouNews(*args, **kwargs):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -221,7 +221,7 @@ def DBpedia(*args, **kwargs):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -247,7 +247,7 @@ def YelpReviewPolarity(*args, **kwargs):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -272,7 +272,7 @@ def YelpReviewFull(*args, **kwargs):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -306,7 +306,7 @@ def YahooAnswers(*args, **kwargs):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -332,7 +332,7 @@ def AmazonReviewPolarity(*args, **kwargs):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -357,7 +357,7 @@ def AmazonReviewFull(*args, **kwargs):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the dataset are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
diff --git a/torchtext/datasets/translation.py b/torchtext/datasets/translation.py
index 058022999f..cbb7ebdb39 100644
--- a/torchtext/datasets/translation.py
+++ b/torchtext/datasets/translation.py
@@ -17,7 +17,7 @@ def sort_key(ex):
     def __init__(self, path, exts, fields, **kwargs):
         """Create a TranslationDataset given paths and fields.
 
-        Arguments:
+        Args:
             path: Common prefix of paths to the data files for both languages.
             exts: A tuple containing the extension to path for each language.
             fields: A tuple containing the fields that will be used for data
@@ -46,7 +46,7 @@ def splits(cls, exts, fields, path=None, root='.data',
                train='train', validation='val', test='test', **kwargs):
         """Create dataset objects for splits of a TranslationDataset.
 
-        Arguments:
+        Args:
             exts: A tuple containing the extension to path for each language.
             fields: A tuple containing the fields that will be used for data
                 in each language.
@@ -87,7 +87,7 @@ def splits(cls, exts, fields, root='.data',
                train='train', validation='val', test='test2016', **kwargs):
         """Create dataset objects for splits of the Multi30k dataset.
 
-        Arguments:
+        Args:
             exts: A tuple containing the extension to path for each language.
             fields: A tuple containing the fields that will be used for data
                 in each language.
@@ -127,7 +127,7 @@ def splits(cls, exts, fields, root='.data',
                test='IWSLT16.TED.tst2014', **kwargs):
         """Create dataset objects for splits of the IWSLT dataset.
 
-        Arguments:
+        Args:
             exts: A tuple containing the extension to path for each language.
             fields: A tuple containing the fields that will be used for data
                 in each language.
@@ -201,7 +201,7 @@ def splits(cls, exts, fields, root='.data',
                test='newstest2014.tok.bpe.32000', **kwargs):
         """Create dataset objects for splits of the WMT 2014 dataset.
 
-        Arguments:
+        Args:
             exts: A tuple containing the extensions for each language. Must be
                 either ('.en', '.de') or the reverse.
             fields: A tuple containing the fields that will be used for data
diff --git a/torchtext/datasets/trec.py b/torchtext/datasets/trec.py
index d96723c672..6e8792b519 100644
--- a/torchtext/datasets/trec.py
+++ b/torchtext/datasets/trec.py
@@ -18,7 +18,7 @@ def __init__(self, path, text_field, label_field,
                  fine_grained=False, **kwargs):
         """Create an TREC dataset instance given a path and fields.
 
-        Arguments:
+        Args:
             path: Path to the data file.
             text_field: The field that will be used for text data.
             label_field: The field that will be used for label data.
@@ -46,7 +46,7 @@ def splits(cls, text_field, label_field, root='.data',
                train='train_5500.label', test='TREC_10.label', **kwargs):
         """Create dataset objects for splits of the TREC dataset.
 
-        Arguments:
+        Args:
             text_field: The field that will be used for the sentence.
             label_field: The field that will be used for label data.
             root: Root dataset storage directory. Default is '.data'.
@@ -64,7 +64,7 @@ def splits(cls, text_field, label_field, root='.data',
     def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs):
         """Create iterator objects for splits of the TREC dataset.
 
-        Arguments:
+        Args:
             batch_size: Batch_size
             device: Device to create batches on. Use - 1 for CPU and None for
                 the currently active GPU device.
diff --git a/torchtext/datasets/unsupervised_learning.py b/torchtext/datasets/unsupervised_learning.py
index f1a97459b4..1babf1bfca 100644
--- a/torchtext/datasets/unsupervised_learning.py
+++ b/torchtext/datasets/unsupervised_learning.py
@@ -85,7 +85,7 @@ class EnWik9(torch.utils.data.Dataset):
     def __init__(self, begin_line=0, num_lines=6348957, root='.data'):
         """Initiate EnWik9 dataset.
 
-        Arguments:
+        Args:
             begin_line: the number of beginning line. Default: 0
             num_lines: the number of lines to be loaded. Default: 6348957
             root: Directory where the datasets are saved. Default: ".data"
diff --git a/torchtext/experimental/datasets/language_modeling.py b/torchtext/experimental/datasets/language_modeling.py
index 777f04d93c..3350c3f3b0 100644
--- a/torchtext/experimental/datasets/language_modeling.py
+++ b/torchtext/experimental/datasets/language_modeling.py
@@ -30,7 +30,7 @@ class LanguageModelingDataset(torch.utils.data.Dataset):
     def __init__(self, data, vocab, transform):
         """Initiate language modeling dataset.
 
-        Arguments:
+        Args:
             data: a tensor of tokens. tokens are ids after
                 numericalizing the string tokens.
                 torch.tensor([token_id_1, token_id_2, token_id_3, token_id1]).long()
@@ -94,7 +94,7 @@ def WikiText2(tokenizer=None, root='.data', vocab=None, data_select=('train', 'v
     Create language modeling dataset: WikiText2
     Separately returns the train/test/valid set
 
-    Arguments:
+    Args:
         tokenizer: the tokenizer used to preprocess raw text data.
             The default one is basic_english tokenizer in fastText. spacy tokenizer
             is supported as well (see example below). A custom tokenizer is callable
@@ -128,7 +128,7 @@ def WikiText103(tokenizer=None, root='.data', vocab=None, data_select=('train',
     Create language modeling dataset: WikiText103
     Separately returns the train/test/valid set
 
-    Arguments:
+    Args:
         tokenizer: the tokenizer used to preprocess raw text data.
             The default one is basic_english tokenizer in fastText. spacy tokenizer
             is supported as well (see example below). A custom tokenizer is callable
@@ -163,7 +163,7 @@ def PennTreebank(tokenizer=None, root='.data', vocab=None, data_select=('train',
     Create language modeling dataset: PennTreebank
     Separately returns the train/test/valid set
 
-    Arguments:
+    Args:
         tokenizer: the tokenizer used to preprocess raw text data.
             The default one is basic_english tokenizer in fastText. spacy tokenizer
             is supported as well (see example below). A custom tokenizer is callable
@@ -198,7 +198,7 @@ def WMTNewsCrawl(tokenizer=None, root='.data', vocab=None, data_select=('train')
     Create language modeling dataset: WMTNewsCrawl
     returns the train set
 
-    Arguments:
+    Args:
         tokenizer: the tokenizer used to preprocess raw text data.
             The default one is basic_english tokenizer in fastText. spacy tokenizer
             is supported as well (see example below). A custom tokenizer is callable
diff --git a/torchtext/experimental/datasets/question_answer.py b/torchtext/experimental/datasets/question_answer.py
index fb3b390dd8..ec239deb97 100644
--- a/torchtext/experimental/datasets/question_answer.py
+++ b/torchtext/experimental/datasets/question_answer.py
@@ -24,7 +24,7 @@ class QuestionAnswerDataset(torch.utils.data.Dataset):
     def __init__(self, data, vocab, transforms):
         """Initiate question answer dataset.
 
-        Arguments:
+        Args:
             data: a tuple of (context, question, answers, ans_pos).
             vocab: Vocabulary object used for dataset.
             transforms: a dictionary of transforms.
@@ -96,7 +96,7 @@ def SQuAD1(root='.data', vocab=None, tokenizer=None, data_select=('train', 'dev'
 
     Separately returns the train and dev dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         vocab: Vocabulary used for dataset. If None, it will generate a new
             vocabulary based on the train data set.
@@ -130,7 +130,7 @@ def SQuAD2(root='.data', vocab=None, tokenizer=None, data_select=('train', 'dev'
 
     Separately returns the train and dev dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         vocab: Vocabulary used for dataset. If None, it will generate a new
             vocabulary based on the train data set.
diff --git a/torchtext/experimental/datasets/raw/language_modeling.py b/torchtext/experimental/datasets/raw/language_modeling.py
index e369c4ebb3..50f3f82a80 100644
--- a/torchtext/experimental/datasets/raw/language_modeling.py
+++ b/torchtext/experimental/datasets/raw/language_modeling.py
@@ -63,7 +63,7 @@ def WikiText2(root='.data', data_select=('train', 'valid', 'test')):
     Create language modeling dataset: WikiText2
     Separately returns the train/test/valid set
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tupel for the returned datasets. Default: ('train', 'valid, 'test')
             By default, all the three datasets (train, test, valid) are generated. Users
@@ -88,7 +88,7 @@ def WikiText103(root='.data', data_select=('train', 'valid', 'test')):
     Create language modeling dataset: WikiText103
     Separately returns the train/test/valid set
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: the returned datasets. Default: ('train', 'valid','test')
             By default, all the three datasets (train, test, valid) are generated. Users
@@ -111,7 +111,7 @@ def PennTreebank(root='.data', data_select=('train', 'valid', 'test')):
     Create language modeling dataset: PennTreebank
     Separately returns the train/test/valid set
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets
             (Default: ('train', 'test','valid'))
@@ -136,7 +136,7 @@ def WMTNewsCrawl(root='.data', data_select=('train'), year=2010, language='en'):
 
     Create language modeling dataset: WMTNewsCrawl
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets.
             (Default: 'train')
diff --git a/torchtext/experimental/datasets/raw/question_answer.py b/torchtext/experimental/datasets/raw/question_answer.py
index 71b63df5d5..d21dbdbc55 100644
--- a/torchtext/experimental/datasets/raw/question_answer.py
+++ b/torchtext/experimental/datasets/raw/question_answer.py
@@ -46,7 +46,7 @@ def SQuAD1(root='.data', data_select=('train', 'dev')):
                   ['Saint Bernadette Soubirous'],
                   [515])
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets (Default: ('train', 'dev'))
             By default, both datasets (train, dev) are generated. Users could also choose any one or two of them,
@@ -70,7 +70,7 @@ def SQuAD2(root='.data', data_select=('train', 'dev')):
                   ['in the late 1990s'],
                   [269])
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets (Default: ('train', 'dev'))
             By default, both datasets (train, dev) are generated. Users could also choose any one or two of them,
diff --git a/torchtext/experimental/datasets/raw/sequence_tagging.py b/torchtext/experimental/datasets/raw/sequence_tagging.py
index b584d147ab..c1a67261f1 100644
--- a/torchtext/experimental/datasets/raw/sequence_tagging.py
+++ b/torchtext/experimental/datasets/raw/sequence_tagging.py
@@ -69,7 +69,7 @@ def UDPOS(root=".data", data_select=('train', 'valid', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets (Default: ('train', 'valid', 'test'))
             By default, all the datasets (train, valid, test) are generated.
@@ -88,7 +88,7 @@ def CoNLL2000Chunking(root=".data", data_select=('train', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets (Default: ('train', 'test'))
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
diff --git a/torchtext/experimental/datasets/raw/text_classification.py b/torchtext/experimental/datasets/raw/text_classification.py
index 46ee16b982..694e4d5d29 100644
--- a/torchtext/experimental/datasets/raw/text_classification.py
+++ b/torchtext/experimental/datasets/raw/text_classification.py
@@ -61,7 +61,7 @@ def AG_NEWS(root='.data', data_select=('train', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -81,7 +81,7 @@ def SogouNews(root='.data', data_select=('train', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -101,7 +101,7 @@ def DBpedia(root='.data', data_select=('train', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -121,7 +121,7 @@ def YelpReviewPolarity(root='.data', data_select=('train', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -141,7 +141,7 @@ def YelpReviewFull(root='.data', data_select=('train', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -161,7 +161,7 @@ def YahooAnswers(root='.data', data_select=('train', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -181,7 +181,7 @@ def AmazonReviewPolarity(root='.data', data_select=('train', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -201,7 +201,7 @@ def AmazonReviewFull(root='.data', data_select=('train', 'test')):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
@@ -231,7 +231,7 @@ def IMDB(root='.data', data_select=('train', 'test')):
 
     Separately returns the raw training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         data_select: a string or tuple for the returned datasets. Default: ('train', 'test')
             By default, both datasets (train, test) are generated. Users could also choose any one or two of them,
diff --git a/torchtext/experimental/datasets/raw/translation.py b/torchtext/experimental/datasets/raw/translation.py
index c36f9abd87..3b74c58421 100644
--- a/torchtext/experimental/datasets/raw/translation.py
+++ b/torchtext/experimental/datasets/raw/translation.py
@@ -234,7 +234,7 @@ def Multi30k(train_filenames=("train.de", "train.en"),
             val.5.de
             val.5.en
 
-    Arguments:
+    Args:
         train_filenames: the source and target filenames for training.
             Default: ('train.de', 'train.en')
         valid_filenames: the source and target filenames for valid.
@@ -400,7 +400,7 @@ def IWSLT(train_filenames=('train.de-en.de', 'train.de-en.en'),
             train.tags.fr-en.en
             train.tags.fr-en.fr
 
-    Arguments:
+    Args:
         train_filenames: the source and target filenames for training.
             Default: ('train.de-en.de', 'train.de-en.en')
         valid_filenames: the source and target filenames for valid.
@@ -486,7 +486,7 @@ def WMT14(train_filenames=('train.tok.clean.bpe.32000.de',
             newstest2015.tok.bpe.32000.de
             train.tok.clean.bpe.32000.de
 
-    Arguments:
+    Args:
         train_filenames: the source and target filenames for training.
             Default: ('train.tok.clean.bpe.32000.de', 'train.tok.clean.bpe.32000.en')
         valid_filenames: the source and target filenames for valid.
diff --git a/torchtext/experimental/datasets/sequence_tagging.py b/torchtext/experimental/datasets/sequence_tagging.py
index 3c0448e292..26b52ab206 100644
--- a/torchtext/experimental/datasets/sequence_tagging.py
+++ b/torchtext/experimental/datasets/sequence_tagging.py
@@ -74,7 +74,7 @@ class SequenceTaggingDataset(torch.utils.data.Dataset):
     def __init__(self, data, vocabs, transforms):
         """Initiate sequence tagging dataset.
 
-        Arguments:
+        Args:
             data: a list of word and its respective tags. Example:
                 [[word, POS, dep_parsing label, ...]]
             vocabs: a list of vocabularies for its respective tags.
@@ -113,7 +113,7 @@ def UDPOS(root=".data", vocabs=None, data_select=("train", "valid", "test")):
 
     Separately returns the training, validation, and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         vocabs: A list of voabularies for each columns in the dataset. Must be in an
             instance of List
@@ -139,7 +139,7 @@ def CoNLL2000Chunking(root=".data", vocabs=None, data_select=("train", "test")):
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         vocabs: A list of voabularies for each columns in the dataset. Must be in an
             instance of List
diff --git a/torchtext/experimental/datasets/text_classification.py b/torchtext/experimental/datasets/text_classification.py
index 1ba9819f9b..0646e8d63a 100644
--- a/torchtext/experimental/datasets/text_classification.py
+++ b/torchtext/experimental/datasets/text_classification.py
@@ -38,7 +38,7 @@ class TextClassificationDataset(torch.utils.data.Dataset):
     def __init__(self, data, vocab, transforms):
         """Initiate text-classification dataset.
 
-        Arguments:
+        Args:
             data: a list of label and text tring tuple. label is an integer.
                 [(label1, text1), (label2, text2), (label2, text3)]
             vocab: Vocabulary object used for dataset.
@@ -113,7 +113,7 @@ def AG_NEWS(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('tr
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -157,7 +157,7 @@ def SogouNews(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -210,7 +210,7 @@ def DBpedia(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('tr
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -251,7 +251,7 @@ def YelpReviewPolarity(root='.data', ngrams=1, vocab=None, tokenizer=None, data_
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -291,7 +291,7 @@ def YelpReviewFull(root='.data', ngrams=1, vocab=None, tokenizer=None, data_sele
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -340,7 +340,7 @@ def YahooAnswers(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -381,7 +381,7 @@ def AmazonReviewPolarity(root='.data', ngrams=1, vocab=None, tokenizer=None, dat
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -421,7 +421,7 @@ def AmazonReviewFull(root='.data', ngrams=1, vocab=None, tokenizer=None, data_se
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
@@ -462,7 +462,7 @@ def IMDB(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('train
 
     Separately returns the training and test dataset
 
-    Arguments:
+    Args:
         root: Directory where the datasets are saved. Default: ".data"
         ngrams: a contiguous sequence of n items from s string text.
             Default: 1
diff --git a/torchtext/experimental/datasets/translation.py b/torchtext/experimental/datasets/translation.py
index abd626d2ec..c38d17401d 100644
--- a/torchtext/experimental/datasets/translation.py
+++ b/torchtext/experimental/datasets/translation.py
@@ -95,7 +95,7 @@ class TranslationDataset(torch.utils.data.Dataset):
     def __init__(self, data, vocab, transforms):
         """Initiate translation dataset.
 
-        Arguments:
+        Args:
             data: a tuple of source and target tensors, which include token ids
                 numericalizing the string tokens.
                 [(src_tensor0, tgt_tensor0), (src_tensor1, tgt_tensor1)]
@@ -143,7 +143,7 @@ def Multi30k(train_filenames=("train.de", "train.en"),
     """ Define translation datasets: Multi30k
     Separately returns train/valid/test datasets as a tuple
 
-    Arguments:
+    Args:
         train_filenames: the source and target filenames for training.
             Default: ('train.de', 'train.en')
         valid_filenames: the source and target filenames for valid.
@@ -245,7 +245,7 @@ def IWSLT(train_filenames=('train.de-en.de', 'train.de-en.en'),
     Separately returns train/valid/test datasets
     The available datasets include:
 
-    Arguments:
+    Args:
         train_filenames: the source and target filenames for training.
             Default: ('train.de-en.de', 'train.de-en.en')
         valid_filenames: the source and target filenames for valid.
@@ -486,7 +486,7 @@ def WMT14(train_filenames=('train.tok.clean.bpe.32000.de',
             newstest2015.tok.bpe.32000.de
             train.tok.clean.bpe.32000.de
 
-    Arguments:
+    Args:
         train_filenames: the source and target filenames for training.
             Default: ('train.tok.clean.bpe.32000.de', 'train.tok.clean.bpe.32000.en')
         valid_filenames: the source and target filenames for valid.
diff --git a/torchtext/experimental/transforms.py b/torchtext/experimental/transforms.py
index 6c3896aa61..1f62ea7032 100644
--- a/torchtext/experimental/transforms.py
+++ b/torchtext/experimental/transforms.py
@@ -209,7 +209,7 @@ def to_ivalue(self):
 def load_sp_model(sp_model):
     r"""Load a  sentencepiece model for file.
 
-    Arguments:
+    Args:
         sp_model: the file path or a file object saving the sentencepiece model.
 
     Outputs:
diff --git a/torchtext/experimental/vectors.py b/torchtext/experimental/vectors.py
index a606e12bcc..72bae2351b 100644
--- a/torchtext/experimental/vectors.py
+++ b/torchtext/experimental/vectors.py
@@ -185,7 +185,7 @@ def load_vectors_from_file_path(filepath, delimiter=",", unk_tensor=None, num_cp
 
 def build_vectors(tokens, vectors, unk_tensor=None):
     r"""Factory method for creating a vectors object which maps tokens to vectors.
-    Arguments:
+    Args:
         tokens (List[str]): a list of tokens.
         vectors (torch.Tensor): a 2d tensor representing the vector associated with each token.
         unk_tensor (torch.Tensor): a 1d tensors representing the vector associated with an unknown token.
diff --git a/torchtext/experimental/vocab.py b/torchtext/experimental/vocab.py
index a7707003ee..6883326938 100644
--- a/torchtext/experimental/vocab.py
+++ b/torchtext/experimental/vocab.py
@@ -85,7 +85,7 @@ def build_vocab_from_iterator(iterator, min_freq=1, unk_token='<unk>'):
     """
     Build a Vocab from an iterator.
 
-    Arguments:
+    Args:
         iterator: Iterator used to build Vocab. Must yield list or iterator of tokens.
         min_freq: The minimum frequency needed to include a token in the vocabulary.
             Values less than 1 will be set to 1. Default: 1.
@@ -108,7 +108,7 @@ def vocab(ordered_dict, min_freq=1, unk_token='<unk>'):
     Therefore if sorting by token frequency is important to the user, the `ordered_dict` should be created in a way to reflect this.
     Additionally, the if the `unk_token` isn't found inside of the `ordered_dict`, it will be added to the end of the vocab.
 
-    Arguments:
+    Args:
         ordered_dict (collections.OrderedDict): object holding the frequencies of each token found in the data.
         min_freq: The minimum frequency needed to include a token in the vocabulary.
             Values less than 1 will be set to 1. Default: 1.
@@ -147,7 +147,7 @@ class Vocab(nn.Module):
     __jit_unused_properties__ = ["is_jitable"]
     r"""Creates a vocab object which maps tokens to indices.
 
-    Arguments:
+    Args:
         vocab (torch.classes.torchtext.Vocab or torchtext._torchtext.Vocab): a cpp vocab object.
     """
 
diff --git a/torchtext/utils.py b/torchtext/utils.py
index 1e4974df21..d5a749487f 100644
--- a/torchtext/utils.py
+++ b/torchtext/utils.py
@@ -38,7 +38,7 @@ def download_from_url(url, path=None, root='.data', overwrite=False, hash_value=
     """Download file, with logic (from tensor2tensor) for Google Drive. Returns
     the path to the downloaded file.
 
-    Arguments:
+    Args:
         url: the url of the file from URL header. (None)
         root: download folder used to store the file in (.data)
         overwrite: overwrite existing files (False)
@@ -136,7 +136,7 @@ def unicode_csv_reader(unicode_csv_data, **kwargs):
     Borrowed and slightly modified from the Python docs:
     https://docs.python.org/2/library/csv.html#csv-examples
 
-    Arguments:
+    Args:
         unicode_csv_data: unicode csv data (see example below)
 
     Examples:
@@ -171,7 +171,7 @@ def utf_8_encoder(unicode_csv_data):
 def extract_archive(from_path, to_path=None, overwrite=False):
     """Extract archive.
 
-    Arguments:
+    Args:
         from_path: the path of the archive.
         to_path: the root path of the extracted files (directory of from_path)
         overwrite: overwrite existing files (False)
diff --git a/torchtext/vocab.py b/torchtext/vocab.py
index fe5101d16d..516f158a16 100755
--- a/torchtext/vocab.py
+++ b/torchtext/vocab.py
@@ -35,7 +35,7 @@ def __init__(self, counter, max_size=None, min_freq=1, specials=('<unk>', '<pad>
                  vectors=None, unk_init=None, vectors_cache=None, specials_first=True):
         """Create a Vocab object from a collections.Counter.
 
-        Arguments:
+        Args:
             counter: collections.Counter object holding the frequencies of
                 each value found in the data.
             max_size: The maximum size of the vocabulary, or None for no
@@ -149,7 +149,7 @@ def extend(self, v, sort=False):
 
     def load_vectors(self, vectors, **kwargs):
         """
-        Arguments:
+        Args:
             vectors: one of or a list containing instantiations of the
                 GloVe, CharNGram, or Vectors classes. Alternatively, one
                 of or a list of available pretrained vectors:
@@ -201,7 +201,7 @@ def set_vectors(self, stoi, vectors, dim, unk_init=torch.Tensor.zero_):
         """
         Set the vectors for the Vocab instance from a collection of Tensors.
 
-        Arguments:
+        Args:
             stoi: A dictionary of string to the index of the associated vector
                 in the `vectors` input argument.
             vectors: An indexed iterable (or other structure supporting __getitem__) that
@@ -228,7 +228,7 @@ def __init__(self, counter, max_size=None, specials=('<pad>'),
                  vectors=None, unk_init=torch.Tensor.zero_):
         """Create a revtok subword vocabulary from a collections.Counter.
 
-        Arguments:
+        Args:
             counter: collections.Counter object holding the frequencies of
                 each word found in the data.
             max_size: The maximum size of the subword vocabulary, or None for no
@@ -301,7 +301,7 @@ class Vectors(object):
     def __init__(self, name, cache=None,
                  url=None, unk_init=None, max_vectors=None):
         """
-        Arguments:
+        Args:
 
             name: name of the file that contains the vectors
             cache: directory for cached vectors
@@ -440,7 +440,7 @@ def __len__(self):
     def get_vecs_by_tokens(self, tokens, lower_case_backup=False):
         """Look up embedding vectors of tokens.
 
-        Arguments:
+        Args:
             tokens: a token or a list of tokens. if `tokens` is a string,
                 returns a 1-D tensor of shape `self.dim`; if `tokens` is a
                 list of strings, returns a 2-D tensor of shape=(len(tokens),
@@ -549,7 +549,7 @@ def build_vocab_from_iterator(iterator, num_lines=None):
     """
     Build a Vocab from an iterator.
 
-    Arguments:
+    Args:
         iterator: Iterator used to build Vocab. Must yield list or iterator of tokens.
         num_lines: The expected number of elements returned by the iterator.
             (Default: None)