pytorch · zhangguanheng66 · Dec 28, 2020 · Dec 22, 2020 · Dec 28, 2020
diff --git a/examples/text_classification/iterable_train.py b/examples/text_classification/iterable_train.py
@@ -60,7 +60,7 @@ def train_and_valid(lr_, num_epoch, train_data_, valid_data_):
     r"""
     Here we use SGD optimizer to train the model.
 
-    Arguments:
+    Args:
         lr_: learning rate
         num_epoch: the number of epoches for training the model
         train_data_: the data used to train the model
@@ -108,7 +108,7 @@ def train_and_valid(lr_, num_epoch, train_data_, valid_data_):
 
 def test(data_):
     r"""
-    Arguments:
+    Args:
         data_: the data used to train the model
     """
     data = DataLoader(
@@ -137,7 +137,7 @@ def get_csv_iterator(data_path, ngrams, vocab, start=0, num_lines=None):
     Generate an iterator to read CSV file.
     The yield values are an integer for the label and a tensor for the text part.
 
-    Arguments:
+    Args:
         data_path: a path for the data file.
         ngrams: the number used for ngrams.
         vocab: a vocab object saving the string-to-index information
@@ -171,7 +171,7 @@ class Dataset(torch.utils.data.IterableDataset):
     An iterable dataset to save the data. This dataset supports multi-processing
     to load the data.
 
-    Arguments:
+    Args:
         iterator: the iterator to read data.
         num_lines: the number of lines read by the individual iterator.
     """

diff --git a/examples/text_classification/model.py b/examples/text_classification/model.py
@@ -31,7 +31,7 @@ def init_weights(self):
 
     def forward(self, text, offsets):
         r"""
-        Arguments:
+        Args:
             text: 1-D tensor representing a bag of text tensors
             offsets: a list of offsets to delimit the 1-D text tensor
                 into the individual sequences.

diff --git a/examples/text_classification/predict.py b/examples/text_classification/predict.py
@@ -11,7 +11,7 @@ def predict(text, model, dictionary, ngrams):
     The input text is numericalized with the vocab and then sent to
     the model for inference.
 
-    Arguments:
+    Args:
         text: a sample text string
         model: the trained model
         dictionary: a vocab object for the information of string-to-index

diff --git a/examples/text_classification/train.py b/examples/text_classification/train.py
@@ -56,7 +56,7 @@ def train_and_valid(lr_, sub_train_, sub_valid_):
     We use a SGD optimizer to train the model here and the learning rate
     decreases linearly with the progress of the training process.
 
-    Arguments:
+    Args:
         lr_: learning rate
         sub_train_: the data used to train the model
         sub_valid_: the data used for validation
@@ -94,7 +94,7 @@ def train_and_valid(lr_, sub_train_, sub_valid_):
 
 def test(data_):
     r"""
-    Arguments:
+    Args:
         data_: the data used to train the model
     """
     data = DataLoader(data_, batch_size=batch_size, collate_fn=generate_batch)

diff --git a/torchtext/data/dataset.py b/torchtext/data/dataset.py
@@ -29,7 +29,7 @@ class Dataset(torch.utils.data.Dataset):
     def __init__(self, examples, fields, filter_pred=None):
         """Create a dataset from a list of Examples and Fields.
 
-        Arguments:
+        Args:
             examples: List of Examples.
             fields (List(tuple(str, Field))): The Fields to use in this tuple. The
                 string is a field name, and the Field is the associated field.
@@ -55,7 +55,7 @@ def splits(cls, path=None, root='.data', train=None, validation=None,
                test=None, **kwargs):
         """Create Dataset objects for multiple splits of a dataset.
 
-        Arguments:
+        Args:
             path (str): Common prefix of the splits' file paths, or None to use
                 the result of cls.download(root).
             root (str): Root dataset storage directory. Default is '.data'.
@@ -87,7 +87,7 @@ def split(self, split_ratio=0.7, stratified=False, strata_field='label',
               random_state=None):
         """Create train-test(-valid?) splits from the instance's examples.
 
-        Arguments:
+        Args:
             split_ratio (float or List of floats): a number [0, 1] denoting the amount
                 of data to be used for the training split (rest is used for test),
                 or a list of numbers denoting the relative sizes of train, test and valid
@@ -157,7 +157,7 @@ def __getattr__(self, attr):
     def download(cls, root, check=None):
         """Download and unzip an online archive (.zip, .gz, or .tgz).
 
-        Arguments:
+        Args:
             root (str): Folder to download data to.
             check (str or None): Folder whose existence indicates
                 that the dataset has already been downloaded, or
@@ -201,7 +201,7 @@ def download(cls, root, check=None):
     def filter_examples(self, field_names):
         """Remove unknown words from dataset examples with respect to given field.
 
-        Arguments:
+        Args:
             field_names (list(str)): Within example only the parts with field names in
                 field_names will have their unknown words deleted.
         """
@@ -221,7 +221,7 @@ def __init__(self, path, format, fields, skip_header=False,
                  csv_reader_params={}, **kwargs):
         """Create a TabularDataset given a path, file format, and field list.
 
-        Arguments:
+        Args:
             path (str): Path to the data file.
             format (str): The format of the data file. One of "CSV", "TSV", or
                 "JSON" (case-insensitive).
@@ -325,7 +325,7 @@ def stratify(examples, strata_field):
 def rationed_split(examples, train_ratio, test_ratio, val_ratio, rnd):
     """Create a random permutation of examples, then split them by ratios
 
-    Arguments:
+    Args:
         examples: a list of data
         train_ratio, test_ratio, val_ratio: split fractions.
         rnd: a random shuffler

diff --git a/torchtext/data/field.py b/torchtext/data/field.py
@@ -274,7 +274,7 @@ def pad(self, minibatch):
     def build_vocab(self, *args, **kwargs):
         """Construct the Vocab object for this field from one or more datasets.
 
-        Arguments:
+        Args:
             Positional arguments: Dataset objects or other iterable data
                 sources from which to construct the Vocab object that
                 represents the set of possible values for this field. If
@@ -311,7 +311,7 @@ def numericalize(self, arr, device=None):
         If the field has include_lengths=True, a tensor of lengths will be
         included in the return value.
 
-        Arguments:
+        Args:
             arr (List[List[str]], or tuple of (List[List[str]], List[int])):
                 List of tokenized and padded examples, or tuple of List of
                 tokenized and padded examples and List of lengths of each
@@ -423,7 +423,7 @@ def __init__(self, **kwargs):
     def segment(self, *args):
         """Segment one or more datasets with this subword field.
 
-        Arguments:
+        Args:
             Positional arguments: Dataset objects or other indexable
                 mutable sequences to segment. If a Dataset object is provided,
                 all columns corresponding to this field are used; individual
@@ -455,7 +455,7 @@ class NestedField(Field):
     primarily used to implement character embeddings. See ``tests/data/test_field.py``
     for examples on how to use this field.
 
-    Arguments:
+    Args:
         nesting_field (Field): A field contained in this nested field.
         use_vocab (bool): Whether to use a Vocab object. If False, the data in this
             field should already be numerical. Default: ``True``.
@@ -533,7 +533,7 @@ def preprocess(self, xs):
         the list is preprocessed using ``self.nesting_field.preprocess`` and the resulting
         list is returned.
 
-        Arguments:
+        Args:
             xs (list or str): The input to preprocess.
 
         Returns:
@@ -576,7 +576,7 @@ def pad(self, minibatch):
                     ['<w>', '</s>', '</w>', '<c>', '<c>', '<c>', '<c>'],
                     ['<c>', '<c>', '<c>', '<c>', '<c>', '<c>', '<c>']]]
 
-        Arguments:
+        Args:
             minibatch (list): Each element is a list of string if
                 ``self.nesting_field.sequential`` is ``False``, a list of list of string
                 otherwise.
@@ -646,7 +646,7 @@ def pad(self, minibatch):
     def build_vocab(self, *args, **kwargs):
         """Construct the Vocab object for nesting field and combine it with this field's vocab.
 
-        Arguments:
+        Args:
             Positional arguments: Dataset objects or other iterable data
                 sources from which to construct the Vocab object that
                 represents the set of possible values for the nesting field. If
@@ -697,7 +697,7 @@ def numericalize(self, arrs, device=None):
         Each item in the minibatch will be numericalized independently and the resulting
         tensors will be stacked at the first dimension.
 
-        Arguments:
+        Args:
             arr (List[List[str]]): List of tokenized and padded examples.
             device (str or torch.device): A string or instance of `torch.device`
                 specifying which device the Variables are going to be created on.

diff --git a/torchtext/data/functional.py b/torchtext/data/functional.py
@@ -21,7 +21,7 @@ def generate_sp_model(filename, vocab_size=20000,
                       model_prefix='m_user'):
     r"""Train a SentencePiece tokenizer.
 
-    Arguments:
+    Args:
         filename: the data file for training SentencePiece model.
         vocab_size: the size of vocabulary (Default: 20,000).
         model_type: the type of SentencePiece model, including unigram,
@@ -42,7 +42,7 @@ def generate_sp_model(filename, vocab_size=20000,
 def load_sp_model(spm):
     r"""Load a  sentencepiece model for file.
 
-    Arguments:
+    Args:
         spm: the file path or a file object saving the sentencepiece model.
 
     Outputs:
@@ -70,7 +70,7 @@ def sentencepiece_numericalizer(sp_model):
     r"""A sentencepiece model to numericalize a text sentence into
        a generator over the ids.
 
-    Arguments:
+    Args:
         sp_model: a SentencePiece model.
 
     Outputs:
@@ -96,7 +96,7 @@ def sentencepiece_tokenizer(sp_model):
     r"""A sentencepiece model to tokenize a text sentence into
        a generator over the tokens.
 
-    Arguments:
+    Args:
         sp_model: a SentencePiece model.
 
     Outputs:
@@ -157,7 +157,7 @@ def simple_space_split(iterator):
 def numericalize_tokens_from_iterator(vocab, iterator, removed_tokens=None):
     r"""Yield a list of ids from an token iterator with a vocab.
 
-    Arguments:
+    Args:
         vocab: the vocabulary convert token into id.
         iterator: the iterator yield a list of tokens.
         removed_tokens: removed tokens from output dataset (Default: None)

diff --git a/torchtext/data/iterator.py b/torchtext/data/iterator.py
@@ -85,7 +85,7 @@ def __init__(self, dataset, batch_size, sort_key=None, device=None,
     def splits(cls, datasets, batch_sizes=None, **kwargs):
         """Create Iterator objects for multiple splits of a dataset.
 
-        Arguments:
+        Args:
             datasets: Tuple of Dataset objects corresponding to the splits. The
                 first such object should be the train set.
             batch_sizes: Tuple of batch sizes to use for the different splits,

diff --git a/torchtext/data/metrics.py b/torchtext/data/metrics.py
@@ -7,7 +7,7 @@
 def _compute_ngram_counter(tokens, max_n):
     """ Create a Counter with a count of unique n-grams in the tokens list
 
-    Arguments:
+    Args:
         tokens: a list of tokens (typically a string split on whitespaces)
         max_n: the maximum order of n-gram wanted
 
@@ -36,7 +36,7 @@ def bleu_score(candidate_corpus, references_corpus, max_n=4, weights=[0.25] * 4)
     """Computes the BLEU score between a candidate translation corpus and a references
     translation corpus. Based on https://www.aclweb.org/anthology/P02-1040.pdf
 
-    Arguments:
+    Args:
         candidate_corpus: an iterable of candidate translations. Each translation is an
             iterable of tokens
         references_corpus: an iterable of iterables of reference translations. Each

diff --git a/torchtext/data/pipeline.py b/torchtext/data/pipeline.py
@@ -12,7 +12,7 @@ class Pipeline(object):
     def __init__(self, convert_token=None):
         """Create a pipeline.
 
-        Arguments:
+        Args:
             convert_token: The function to apply to input sequence data.
                 If None, the identity function is used. Default: None
         """
@@ -28,7 +28,7 @@ def __init__(self, convert_token=None):
     def __call__(self, x, *args):
         """Apply the the current Pipeline(s) to an input.
 
-        Arguments:
+        Args:
             x: The input to process with the Pipeline(s).
             Positional arguments: Forwarded to the `call` function
                 of the Pipeline(s).
@@ -43,7 +43,7 @@ def call(self, x, *args):
         applying the `convert_token` function to all input elements is
         returned.
 
-        Arguments:
+        Args:
             x: The input to apply the convert_token function to.
             Positional arguments: Forwarded to the `convert_token` function
                 of the current Pipeline.
@@ -55,7 +55,7 @@ def call(self, x, *args):
     def add_before(self, pipeline):
         """Add a Pipeline to be applied before this processing pipeline.
 
-        Arguments:
+        Args:
             pipeline: The Pipeline or callable to apply before this
                 Pipeline.
         """
@@ -67,7 +67,7 @@ def add_before(self, pipeline):
     def add_after(self, pipeline):
         """Add a Pipeline to be applied after this processing pipeline.
 
-        Arguments:
+        Args:
             pipeline: The Pipeline or callable to apply after this
                 Pipeline.
         """

diff --git a/torchtext/data/utils.py b/torchtext/data/utils.py
@@ -76,7 +76,7 @@ def get_tokenizer(tokenizer, language='en'):
     r"""
     Generate tokenizer function for a string sentence.
 
-    Arguments:
+    Args:
         tokenizer: the name of tokenizer function. If None, it returns split()
             function, which splits the string sentence by space.
             If basic_english, it returns _basic_english_normalize() function,
@@ -205,7 +205,7 @@ def dtype_to_attr(dtype):
 def ngrams_iterator(token_list, ngrams):
     """Return an iterator that yields the given tokens and their ngrams.
 
-    Arguments:
+    Args:
         token_list: A list of tokens
         ngrams: the number of ngrams.
 

diff --git a/torchtext/datasets/imdb.py b/torchtext/datasets/imdb.py
@@ -18,7 +18,7 @@ def sort_key(ex):
     def __init__(self, path, text_field, label_field, **kwargs):
         """Create an IMDB dataset instance given a path and fields.
 
-        Arguments:
+        Args:
             path: Path to the dataset's highest level directory
             text_field: The field that will be used for text data.
             label_field: The field that will be used for label data.
@@ -41,7 +41,7 @@ def splits(cls, text_field, label_field, root='.data',
                train='train', test='test', **kwargs):
         """Create dataset objects for splits of the IMDB dataset.
 
-        Arguments:
+        Args:
             text_field: The field that will be used for the sentence.
             label_field: The field that will be used for label data.
             root: Root dataset storage directory. Default is '.data'.
@@ -58,7 +58,7 @@ def splits(cls, text_field, label_field, root='.data',
     def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs):
         """Create iterator objects for splits of the IMDB dataset.
 
-        Arguments:
+        Args:
             batch_size: Batch_size
             device: Device to create batches on. Use - 1 for CPU and None for
                 the currently active GPU device.