From 1ff4043c2fd40e90e71df738e585a5e81b687e7f Mon Sep 17 00:00:00 2001 From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com> Date: Tue, 22 Dec 2020 23:31:45 +1100 Subject: [PATCH] [*.py] Rename "Arguments:" to "Args:" --- .../text_classification/iterable_train.py | 8 ++++---- examples/text_classification/model.py | 2 +- examples/text_classification/predict.py | 2 +- examples/text_classification/train.py | 4 ++-- torchtext/data/dataset.py | 14 ++++++------- torchtext/data/field.py | 16 +++++++-------- torchtext/data/functional.py | 10 +++++----- torchtext/data/iterator.py | 2 +- torchtext/data/metrics.py | 4 ++-- torchtext/data/pipeline.py | 10 +++++----- torchtext/data/utils.py | 4 ++-- torchtext/datasets/imdb.py | 6 +++--- torchtext/datasets/language_modeling.py | 14 ++++++------- torchtext/datasets/nli.py | 4 ++-- torchtext/datasets/sst.py | 6 +++--- torchtext/datasets/text_classification.py | 18 ++++++++--------- torchtext/datasets/translation.py | 10 +++++----- torchtext/datasets/trec.py | 6 +++--- torchtext/datasets/unsupervised_learning.py | 2 +- .../datasets/language_modeling.py | 10 +++++----- .../experimental/datasets/question_answer.py | 6 +++--- .../datasets/raw/language_modeling.py | 8 ++++---- .../datasets/raw/question_answer.py | 4 ++-- .../datasets/raw/sequence_tagging.py | 4 ++-- .../datasets/raw/text_classification.py | 18 ++++++++--------- .../experimental/datasets/raw/translation.py | 6 +++--- .../experimental/datasets/sequence_tagging.py | 6 +++--- .../datasets/text_classification.py | 20 +++++++++---------- .../experimental/datasets/translation.py | 8 ++++---- torchtext/experimental/transforms.py | 2 +- torchtext/experimental/vectors.py | 2 +- torchtext/experimental/vocab.py | 6 +++--- torchtext/utils.py | 6 +++--- torchtext/vocab.py | 14 ++++++------- 34 files changed, 131 insertions(+), 131 deletions(-) diff --git a/examples/text_classification/iterable_train.py b/examples/text_classification/iterable_train.py index d4e6507bc5..94b3c1c3a3 100644 --- a/examples/text_classification/iterable_train.py +++ b/examples/text_classification/iterable_train.py @@ -60,7 +60,7 @@ def train_and_valid(lr_, num_epoch, train_data_, valid_data_): r""" Here we use SGD optimizer to train the model. - Arguments: + Args: lr_: learning rate num_epoch: the number of epoches for training the model train_data_: the data used to train the model @@ -108,7 +108,7 @@ def train_and_valid(lr_, num_epoch, train_data_, valid_data_): def test(data_): r""" - Arguments: + Args: data_: the data used to train the model """ data = DataLoader( @@ -137,7 +137,7 @@ def get_csv_iterator(data_path, ngrams, vocab, start=0, num_lines=None): Generate an iterator to read CSV file. The yield values are an integer for the label and a tensor for the text part. - Arguments: + Args: data_path: a path for the data file. ngrams: the number used for ngrams. vocab: a vocab object saving the string-to-index information @@ -171,7 +171,7 @@ class Dataset(torch.utils.data.IterableDataset): An iterable dataset to save the data. This dataset supports multi-processing to load the data. - Arguments: + Args: iterator: the iterator to read data. num_lines: the number of lines read by the individual iterator. """ diff --git a/examples/text_classification/model.py b/examples/text_classification/model.py index e96d1d0125..1314b1b55a 100644 --- a/examples/text_classification/model.py +++ b/examples/text_classification/model.py @@ -31,7 +31,7 @@ def init_weights(self): def forward(self, text, offsets): r""" - Arguments: + Args: text: 1-D tensor representing a bag of text tensors offsets: a list of offsets to delimit the 1-D text tensor into the individual sequences. diff --git a/examples/text_classification/predict.py b/examples/text_classification/predict.py index b11de131a9..4bdd1f7fce 100644 --- a/examples/text_classification/predict.py +++ b/examples/text_classification/predict.py @@ -11,7 +11,7 @@ def predict(text, model, dictionary, ngrams): The input text is numericalized with the vocab and then sent to the model for inference. - Arguments: + Args: text: a sample text string model: the trained model dictionary: a vocab object for the information of string-to-index diff --git a/examples/text_classification/train.py b/examples/text_classification/train.py index fbd0831832..84f20fb0d4 100644 --- a/examples/text_classification/train.py +++ b/examples/text_classification/train.py @@ -56,7 +56,7 @@ def train_and_valid(lr_, sub_train_, sub_valid_): We use a SGD optimizer to train the model here and the learning rate decreases linearly with the progress of the training process. - Arguments: + Args: lr_: learning rate sub_train_: the data used to train the model sub_valid_: the data used for validation @@ -94,7 +94,7 @@ def train_and_valid(lr_, sub_train_, sub_valid_): def test(data_): r""" - Arguments: + Args: data_: the data used to train the model """ data = DataLoader(data_, batch_size=batch_size, collate_fn=generate_batch) diff --git a/torchtext/data/dataset.py b/torchtext/data/dataset.py index c9efe9168b..eecfc49d9c 100644 --- a/torchtext/data/dataset.py +++ b/torchtext/data/dataset.py @@ -29,7 +29,7 @@ class Dataset(torch.utils.data.Dataset): def __init__(self, examples, fields, filter_pred=None): """Create a dataset from a list of Examples and Fields. - Arguments: + Args: examples: List of Examples. fields (List(tuple(str, Field))): The Fields to use in this tuple. The string is a field name, and the Field is the associated field. @@ -55,7 +55,7 @@ def splits(cls, path=None, root='.data', train=None, validation=None, test=None, **kwargs): """Create Dataset objects for multiple splits of a dataset. - Arguments: + Args: path (str): Common prefix of the splits' file paths, or None to use the result of cls.download(root). root (str): Root dataset storage directory. Default is '.data'. @@ -87,7 +87,7 @@ def split(self, split_ratio=0.7, stratified=False, strata_field='label', random_state=None): """Create train-test(-valid?) splits from the instance's examples. - Arguments: + Args: split_ratio (float or List of floats): a number [0, 1] denoting the amount of data to be used for the training split (rest is used for test), or a list of numbers denoting the relative sizes of train, test and valid @@ -157,7 +157,7 @@ def __getattr__(self, attr): def download(cls, root, check=None): """Download and unzip an online archive (.zip, .gz, or .tgz). - Arguments: + Args: root (str): Folder to download data to. check (str or None): Folder whose existence indicates that the dataset has already been downloaded, or @@ -201,7 +201,7 @@ def download(cls, root, check=None): def filter_examples(self, field_names): """Remove unknown words from dataset examples with respect to given field. - Arguments: + Args: field_names (list(str)): Within example only the parts with field names in field_names will have their unknown words deleted. """ @@ -221,7 +221,7 @@ def __init__(self, path, format, fields, skip_header=False, csv_reader_params={}, **kwargs): """Create a TabularDataset given a path, file format, and field list. - Arguments: + Args: path (str): Path to the data file. format (str): The format of the data file. One of "CSV", "TSV", or "JSON" (case-insensitive). @@ -325,7 +325,7 @@ def stratify(examples, strata_field): def rationed_split(examples, train_ratio, test_ratio, val_ratio, rnd): """Create a random permutation of examples, then split them by ratios - Arguments: + Args: examples: a list of data train_ratio, test_ratio, val_ratio: split fractions. rnd: a random shuffler diff --git a/torchtext/data/field.py b/torchtext/data/field.py index e117b2edb7..95be1b85f2 100644 --- a/torchtext/data/field.py +++ b/torchtext/data/field.py @@ -274,7 +274,7 @@ def pad(self, minibatch): def build_vocab(self, *args, **kwargs): """Construct the Vocab object for this field from one or more datasets. - Arguments: + Args: Positional arguments: Dataset objects or other iterable data sources from which to construct the Vocab object that represents the set of possible values for this field. If @@ -311,7 +311,7 @@ def numericalize(self, arr, device=None): If the field has include_lengths=True, a tensor of lengths will be included in the return value. - Arguments: + Args: arr (List[List[str]], or tuple of (List[List[str]], List[int])): List of tokenized and padded examples, or tuple of List of tokenized and padded examples and List of lengths of each @@ -423,7 +423,7 @@ def __init__(self, **kwargs): def segment(self, *args): """Segment one or more datasets with this subword field. - Arguments: + Args: Positional arguments: Dataset objects or other indexable mutable sequences to segment. If a Dataset object is provided, all columns corresponding to this field are used; individual @@ -455,7 +455,7 @@ class NestedField(Field): primarily used to implement character embeddings. See ``tests/data/test_field.py`` for examples on how to use this field. - Arguments: + Args: nesting_field (Field): A field contained in this nested field. use_vocab (bool): Whether to use a Vocab object. If False, the data in this field should already be numerical. Default: ``True``. @@ -533,7 +533,7 @@ def preprocess(self, xs): the list is preprocessed using ``self.nesting_field.preprocess`` and the resulting list is returned. - Arguments: + Args: xs (list or str): The input to preprocess. Returns: @@ -576,7 +576,7 @@ def pad(self, minibatch): ['', '', '', '', '', '', ''], ['', '', '', '', '', '', '']]] - Arguments: + Args: minibatch (list): Each element is a list of string if ``self.nesting_field.sequential`` is ``False``, a list of list of string otherwise. @@ -646,7 +646,7 @@ def pad(self, minibatch): def build_vocab(self, *args, **kwargs): """Construct the Vocab object for nesting field and combine it with this field's vocab. - Arguments: + Args: Positional arguments: Dataset objects or other iterable data sources from which to construct the Vocab object that represents the set of possible values for the nesting field. If @@ -697,7 +697,7 @@ def numericalize(self, arrs, device=None): Each item in the minibatch will be numericalized independently and the resulting tensors will be stacked at the first dimension. - Arguments: + Args: arr (List[List[str]]): List of tokenized and padded examples. device (str or torch.device): A string or instance of `torch.device` specifying which device the Variables are going to be created on. diff --git a/torchtext/data/functional.py b/torchtext/data/functional.py index 6e20c8e667..995025e929 100644 --- a/torchtext/data/functional.py +++ b/torchtext/data/functional.py @@ -21,7 +21,7 @@ def generate_sp_model(filename, vocab_size=20000, model_prefix='m_user'): r"""Train a SentencePiece tokenizer. - Arguments: + Args: filename: the data file for training SentencePiece model. vocab_size: the size of vocabulary (Default: 20,000). model_type: the type of SentencePiece model, including unigram, @@ -42,7 +42,7 @@ def generate_sp_model(filename, vocab_size=20000, def load_sp_model(spm): r"""Load a sentencepiece model for file. - Arguments: + Args: spm: the file path or a file object saving the sentencepiece model. Outputs: @@ -70,7 +70,7 @@ def sentencepiece_numericalizer(sp_model): r"""A sentencepiece model to numericalize a text sentence into a generator over the ids. - Arguments: + Args: sp_model: a SentencePiece model. Outputs: @@ -96,7 +96,7 @@ def sentencepiece_tokenizer(sp_model): r"""A sentencepiece model to tokenize a text sentence into a generator over the tokens. - Arguments: + Args: sp_model: a SentencePiece model. Outputs: @@ -157,7 +157,7 @@ def simple_space_split(iterator): def numericalize_tokens_from_iterator(vocab, iterator, removed_tokens=None): r"""Yield a list of ids from an token iterator with a vocab. - Arguments: + Args: vocab: the vocabulary convert token into id. iterator: the iterator yield a list of tokens. removed_tokens: removed tokens from output dataset (Default: None) diff --git a/torchtext/data/iterator.py b/torchtext/data/iterator.py index 4c1119cb8c..3dfa807138 100644 --- a/torchtext/data/iterator.py +++ b/torchtext/data/iterator.py @@ -85,7 +85,7 @@ def __init__(self, dataset, batch_size, sort_key=None, device=None, def splits(cls, datasets, batch_sizes=None, **kwargs): """Create Iterator objects for multiple splits of a dataset. - Arguments: + Args: datasets: Tuple of Dataset objects corresponding to the splits. The first such object should be the train set. batch_sizes: Tuple of batch sizes to use for the different splits, diff --git a/torchtext/data/metrics.py b/torchtext/data/metrics.py index 63e07cdb21..c5c2983ee4 100644 --- a/torchtext/data/metrics.py +++ b/torchtext/data/metrics.py @@ -7,7 +7,7 @@ def _compute_ngram_counter(tokens, max_n): """ Create a Counter with a count of unique n-grams in the tokens list - Arguments: + Args: tokens: a list of tokens (typically a string split on whitespaces) max_n: the maximum order of n-gram wanted @@ -36,7 +36,7 @@ def bleu_score(candidate_corpus, references_corpus, max_n=4, weights=[0.25] * 4) """Computes the BLEU score between a candidate translation corpus and a references translation corpus. Based on https://www.aclweb.org/anthology/P02-1040.pdf - Arguments: + Args: candidate_corpus: an iterable of candidate translations. Each translation is an iterable of tokens references_corpus: an iterable of iterables of reference translations. Each diff --git a/torchtext/data/pipeline.py b/torchtext/data/pipeline.py index f576fdc720..d72ef5ef4c 100644 --- a/torchtext/data/pipeline.py +++ b/torchtext/data/pipeline.py @@ -12,7 +12,7 @@ class Pipeline(object): def __init__(self, convert_token=None): """Create a pipeline. - Arguments: + Args: convert_token: The function to apply to input sequence data. If None, the identity function is used. Default: None """ @@ -28,7 +28,7 @@ def __init__(self, convert_token=None): def __call__(self, x, *args): """Apply the the current Pipeline(s) to an input. - Arguments: + Args: x: The input to process with the Pipeline(s). Positional arguments: Forwarded to the `call` function of the Pipeline(s). @@ -43,7 +43,7 @@ def call(self, x, *args): applying the `convert_token` function to all input elements is returned. - Arguments: + Args: x: The input to apply the convert_token function to. Positional arguments: Forwarded to the `convert_token` function of the current Pipeline. @@ -55,7 +55,7 @@ def call(self, x, *args): def add_before(self, pipeline): """Add a Pipeline to be applied before this processing pipeline. - Arguments: + Args: pipeline: The Pipeline or callable to apply before this Pipeline. """ @@ -67,7 +67,7 @@ def add_before(self, pipeline): def add_after(self, pipeline): """Add a Pipeline to be applied after this processing pipeline. - Arguments: + Args: pipeline: The Pipeline or callable to apply after this Pipeline. """ diff --git a/torchtext/data/utils.py b/torchtext/data/utils.py index 5ecfad1958..045c2646fb 100644 --- a/torchtext/data/utils.py +++ b/torchtext/data/utils.py @@ -76,7 +76,7 @@ def get_tokenizer(tokenizer, language='en'): r""" Generate tokenizer function for a string sentence. - Arguments: + Args: tokenizer: the name of tokenizer function. If None, it returns split() function, which splits the string sentence by space. If basic_english, it returns _basic_english_normalize() function, @@ -205,7 +205,7 @@ def dtype_to_attr(dtype): def ngrams_iterator(token_list, ngrams): """Return an iterator that yields the given tokens and their ngrams. - Arguments: + Args: token_list: A list of tokens ngrams: the number of ngrams. diff --git a/torchtext/datasets/imdb.py b/torchtext/datasets/imdb.py index 38fccb97be..e59ce19ecb 100644 --- a/torchtext/datasets/imdb.py +++ b/torchtext/datasets/imdb.py @@ -18,7 +18,7 @@ def sort_key(ex): def __init__(self, path, text_field, label_field, **kwargs): """Create an IMDB dataset instance given a path and fields. - Arguments: + Args: path: Path to the dataset's highest level directory text_field: The field that will be used for text data. label_field: The field that will be used for label data. @@ -41,7 +41,7 @@ def splits(cls, text_field, label_field, root='.data', train='train', test='test', **kwargs): """Create dataset objects for splits of the IMDB dataset. - Arguments: + Args: text_field: The field that will be used for the sentence. label_field: The field that will be used for label data. root: Root dataset storage directory. Default is '.data'. @@ -58,7 +58,7 @@ def splits(cls, text_field, label_field, root='.data', def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs): """Create iterator objects for splits of the IMDB dataset. - Arguments: + Args: batch_size: Batch_size device: Device to create batches on. Use - 1 for CPU and None for the currently active GPU device. diff --git a/torchtext/datasets/language_modeling.py b/torchtext/datasets/language_modeling.py index 0002aabc04..7ebcca71b1 100644 --- a/torchtext/datasets/language_modeling.py +++ b/torchtext/datasets/language_modeling.py @@ -9,7 +9,7 @@ def __init__(self, path, text_field, newline_eos=True, encoding='utf-8', **kwargs): """Create a LanguageModelingDataset given a path and a field. - Arguments: + Args: path: Path to the data file. text_field: The field that will be used for text data. newline_eos: Whether to add an token for every newline in the @@ -44,7 +44,7 @@ def splits(cls, text_field, root='.data', train='wiki.train.tokens', This is the most flexible way to use the dataset. - Arguments: + Args: text_field: The field that will be used for text data. root: The root directory that the dataset's zip archive will be expanded into; therefore the directory in whose wikitext-2 @@ -67,7 +67,7 @@ def iters(cls, batch_size=32, bptt_len=35, device=0, root='.data', This is the simplest way to use the dataset, and assumes common defaults for field, vocabulary, and iterator parameters. - Arguments: + Args: batch_size: Batch size. bptt_len: Length of sequences for backpropagation through time. device: Device to create batches on. Use -1 for CPU and None for @@ -105,7 +105,7 @@ def splits(cls, text_field, root='.data', train='wiki.train.tokens', This is the most flexible way to use the dataset. - Arguments: + Args: text_field: The field that will be used for text data. root: The root directory that the dataset's zip archive will be expanded into; therefore the directory in whose wikitext-103 @@ -128,7 +128,7 @@ def iters(cls, batch_size=32, bptt_len=35, device=0, root='.data', This is the simplest way to use the dataset, and assumes common defaults for field, vocabulary, and iterator parameters. - Arguments: + Args: batch_size: Batch size. bptt_len: Length of sequences for backpropagation through time. device: Device to create batches on. Use -1 for CPU and None for @@ -174,7 +174,7 @@ def splits(cls, text_field, root='.data', train='ptb.train.txt', **kwargs): """Create dataset objects for splits of the Penn Treebank dataset. - Arguments: + Args: text_field: The field that will be used for text data. root: The root directory where the data files will be stored. train: The filename of the train data. Default: 'ptb.train.txt'. @@ -195,7 +195,7 @@ def iters(cls, batch_size=32, bptt_len=35, device=0, root='.data', This is the simplest way to use the dataset, and assumes common defaults for field, vocabulary, and iterator parameters. - Arguments: + Args: batch_size: Batch size. bptt_len: Length of sequences for backpropagation through time. device: Device to create batches on. Use -1 for CPU and None for diff --git a/torchtext/datasets/nli.py b/torchtext/datasets/nli.py index 9b3d758b06..758576e7cc 100644 --- a/torchtext/datasets/nli.py +++ b/torchtext/datasets/nli.py @@ -51,7 +51,7 @@ def splits(cls, text_field, label_field, parse_field=None, This is the most flexible way to use the dataset. - Arguments: + Args: text_field: The field that will be used for premise and hypothesis data. label_field: The field that will be used for label data. @@ -96,7 +96,7 @@ def iters(cls, batch_size=32, device=0, root='.data', This is the simplest way to use the dataset, and assumes common defaults for field, vocabulary, and iterator parameters. - Arguments: + Args: batch_size: Batch size. device: Device to create batches on. Use -1 for CPU and None for the currently active GPU device. diff --git a/torchtext/datasets/sst.py b/torchtext/datasets/sst.py index 95a04e3f6b..8c793fad93 100644 --- a/torchtext/datasets/sst.py +++ b/torchtext/datasets/sst.py @@ -17,7 +17,7 @@ def __init__(self, path, text_field, label_field, subtrees=False, fine_grained=False, **kwargs): """Create an SST dataset instance given a path and fields. - Arguments: + Args: path: Path to the data file text_field: The field that will be used for text data. label_field: The field that will be used for label data. @@ -49,7 +49,7 @@ def splits(cls, text_field, label_field, root='.data', train_subtrees=False, **kwargs): """Create dataset objects for splits of the SST dataset. - Arguments: + Args: text_field: The field that will be used for the sentence. label_field: The field that will be used for label data. root: The root directory that the dataset's zip archive will be @@ -81,7 +81,7 @@ def splits(cls, text_field, label_field, root='.data', def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs): """Create iterator objects for splits of the SST dataset. - Arguments: + Args: batch_size: Batch_size device: Device to create batches on. Use - 1 for CPU and None for the currently active GPU device. diff --git a/torchtext/datasets/text_classification.py b/torchtext/datasets/text_classification.py index be7400f91b..fc08876a34 100644 --- a/torchtext/datasets/text_classification.py +++ b/torchtext/datasets/text_classification.py @@ -78,7 +78,7 @@ class TextClassificationDataset(torch.utils.data.Dataset): def __init__(self, vocab, data, labels): """Initiate text-classification dataset. - Arguments: + Args: vocab: Vocabulary object used for dataset. data: a list of label/tokens tuple. tokens are a tensor after numericalizing the string tokens. label is an integer. @@ -154,7 +154,7 @@ def AG_NEWS(*args, **kwargs): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -183,7 +183,7 @@ def SogouNews(*args, **kwargs): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -221,7 +221,7 @@ def DBpedia(*args, **kwargs): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -247,7 +247,7 @@ def YelpReviewPolarity(*args, **kwargs): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -272,7 +272,7 @@ def YelpReviewFull(*args, **kwargs): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -306,7 +306,7 @@ def YahooAnswers(*args, **kwargs): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -332,7 +332,7 @@ def AmazonReviewPolarity(*args, **kwargs): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -357,7 +357,7 @@ def AmazonReviewFull(*args, **kwargs): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the dataset are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 diff --git a/torchtext/datasets/translation.py b/torchtext/datasets/translation.py index 058022999f..cbb7ebdb39 100644 --- a/torchtext/datasets/translation.py +++ b/torchtext/datasets/translation.py @@ -17,7 +17,7 @@ def sort_key(ex): def __init__(self, path, exts, fields, **kwargs): """Create a TranslationDataset given paths and fields. - Arguments: + Args: path: Common prefix of paths to the data files for both languages. exts: A tuple containing the extension to path for each language. fields: A tuple containing the fields that will be used for data @@ -46,7 +46,7 @@ def splits(cls, exts, fields, path=None, root='.data', train='train', validation='val', test='test', **kwargs): """Create dataset objects for splits of a TranslationDataset. - Arguments: + Args: exts: A tuple containing the extension to path for each language. fields: A tuple containing the fields that will be used for data in each language. @@ -87,7 +87,7 @@ def splits(cls, exts, fields, root='.data', train='train', validation='val', test='test2016', **kwargs): """Create dataset objects for splits of the Multi30k dataset. - Arguments: + Args: exts: A tuple containing the extension to path for each language. fields: A tuple containing the fields that will be used for data in each language. @@ -127,7 +127,7 @@ def splits(cls, exts, fields, root='.data', test='IWSLT16.TED.tst2014', **kwargs): """Create dataset objects for splits of the IWSLT dataset. - Arguments: + Args: exts: A tuple containing the extension to path for each language. fields: A tuple containing the fields that will be used for data in each language. @@ -201,7 +201,7 @@ def splits(cls, exts, fields, root='.data', test='newstest2014.tok.bpe.32000', **kwargs): """Create dataset objects for splits of the WMT 2014 dataset. - Arguments: + Args: exts: A tuple containing the extensions for each language. Must be either ('.en', '.de') or the reverse. fields: A tuple containing the fields that will be used for data diff --git a/torchtext/datasets/trec.py b/torchtext/datasets/trec.py index d96723c672..6e8792b519 100644 --- a/torchtext/datasets/trec.py +++ b/torchtext/datasets/trec.py @@ -18,7 +18,7 @@ def __init__(self, path, text_field, label_field, fine_grained=False, **kwargs): """Create an TREC dataset instance given a path and fields. - Arguments: + Args: path: Path to the data file. text_field: The field that will be used for text data. label_field: The field that will be used for label data. @@ -46,7 +46,7 @@ def splits(cls, text_field, label_field, root='.data', train='train_5500.label', test='TREC_10.label', **kwargs): """Create dataset objects for splits of the TREC dataset. - Arguments: + Args: text_field: The field that will be used for the sentence. label_field: The field that will be used for label data. root: Root dataset storage directory. Default is '.data'. @@ -64,7 +64,7 @@ def splits(cls, text_field, label_field, root='.data', def iters(cls, batch_size=32, device=0, root='.data', vectors=None, **kwargs): """Create iterator objects for splits of the TREC dataset. - Arguments: + Args: batch_size: Batch_size device: Device to create batches on. Use - 1 for CPU and None for the currently active GPU device. diff --git a/torchtext/datasets/unsupervised_learning.py b/torchtext/datasets/unsupervised_learning.py index f1a97459b4..1babf1bfca 100644 --- a/torchtext/datasets/unsupervised_learning.py +++ b/torchtext/datasets/unsupervised_learning.py @@ -85,7 +85,7 @@ class EnWik9(torch.utils.data.Dataset): def __init__(self, begin_line=0, num_lines=6348957, root='.data'): """Initiate EnWik9 dataset. - Arguments: + Args: begin_line: the number of beginning line. Default: 0 num_lines: the number of lines to be loaded. Default: 6348957 root: Directory where the datasets are saved. Default: ".data" diff --git a/torchtext/experimental/datasets/language_modeling.py b/torchtext/experimental/datasets/language_modeling.py index 777f04d93c..3350c3f3b0 100644 --- a/torchtext/experimental/datasets/language_modeling.py +++ b/torchtext/experimental/datasets/language_modeling.py @@ -30,7 +30,7 @@ class LanguageModelingDataset(torch.utils.data.Dataset): def __init__(self, data, vocab, transform): """Initiate language modeling dataset. - Arguments: + Args: data: a tensor of tokens. tokens are ids after numericalizing the string tokens. torch.tensor([token_id_1, token_id_2, token_id_3, token_id1]).long() @@ -94,7 +94,7 @@ def WikiText2(tokenizer=None, root='.data', vocab=None, data_select=('train', 'v Create language modeling dataset: WikiText2 Separately returns the train/test/valid set - Arguments: + Args: tokenizer: the tokenizer used to preprocess raw text data. The default one is basic_english tokenizer in fastText. spacy tokenizer is supported as well (see example below). A custom tokenizer is callable @@ -128,7 +128,7 @@ def WikiText103(tokenizer=None, root='.data', vocab=None, data_select=('train', Create language modeling dataset: WikiText103 Separately returns the train/test/valid set - Arguments: + Args: tokenizer: the tokenizer used to preprocess raw text data. The default one is basic_english tokenizer in fastText. spacy tokenizer is supported as well (see example below). A custom tokenizer is callable @@ -163,7 +163,7 @@ def PennTreebank(tokenizer=None, root='.data', vocab=None, data_select=('train', Create language modeling dataset: PennTreebank Separately returns the train/test/valid set - Arguments: + Args: tokenizer: the tokenizer used to preprocess raw text data. The default one is basic_english tokenizer in fastText. spacy tokenizer is supported as well (see example below). A custom tokenizer is callable @@ -198,7 +198,7 @@ def WMTNewsCrawl(tokenizer=None, root='.data', vocab=None, data_select=('train') Create language modeling dataset: WMTNewsCrawl returns the train set - Arguments: + Args: tokenizer: the tokenizer used to preprocess raw text data. The default one is basic_english tokenizer in fastText. spacy tokenizer is supported as well (see example below). A custom tokenizer is callable diff --git a/torchtext/experimental/datasets/question_answer.py b/torchtext/experimental/datasets/question_answer.py index fb3b390dd8..ec239deb97 100644 --- a/torchtext/experimental/datasets/question_answer.py +++ b/torchtext/experimental/datasets/question_answer.py @@ -24,7 +24,7 @@ class QuestionAnswerDataset(torch.utils.data.Dataset): def __init__(self, data, vocab, transforms): """Initiate question answer dataset. - Arguments: + Args: data: a tuple of (context, question, answers, ans_pos). vocab: Vocabulary object used for dataset. transforms: a dictionary of transforms. @@ -96,7 +96,7 @@ def SQuAD1(root='.data', vocab=None, tokenizer=None, data_select=('train', 'dev' Separately returns the train and dev dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" vocab: Vocabulary used for dataset. If None, it will generate a new vocabulary based on the train data set. @@ -130,7 +130,7 @@ def SQuAD2(root='.data', vocab=None, tokenizer=None, data_select=('train', 'dev' Separately returns the train and dev dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" vocab: Vocabulary used for dataset. If None, it will generate a new vocabulary based on the train data set. diff --git a/torchtext/experimental/datasets/raw/language_modeling.py b/torchtext/experimental/datasets/raw/language_modeling.py index e369c4ebb3..50f3f82a80 100644 --- a/torchtext/experimental/datasets/raw/language_modeling.py +++ b/torchtext/experimental/datasets/raw/language_modeling.py @@ -63,7 +63,7 @@ def WikiText2(root='.data', data_select=('train', 'valid', 'test')): Create language modeling dataset: WikiText2 Separately returns the train/test/valid set - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tupel for the returned datasets. Default: ('train', 'valid, 'test') By default, all the three datasets (train, test, valid) are generated. Users @@ -88,7 +88,7 @@ def WikiText103(root='.data', data_select=('train', 'valid', 'test')): Create language modeling dataset: WikiText103 Separately returns the train/test/valid set - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: the returned datasets. Default: ('train', 'valid','test') By default, all the three datasets (train, test, valid) are generated. Users @@ -111,7 +111,7 @@ def PennTreebank(root='.data', data_select=('train', 'valid', 'test')): Create language modeling dataset: PennTreebank Separately returns the train/test/valid set - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets (Default: ('train', 'test','valid')) @@ -136,7 +136,7 @@ def WMTNewsCrawl(root='.data', data_select=('train'), year=2010, language='en'): Create language modeling dataset: WMTNewsCrawl - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. (Default: 'train') diff --git a/torchtext/experimental/datasets/raw/question_answer.py b/torchtext/experimental/datasets/raw/question_answer.py index 71b63df5d5..d21dbdbc55 100644 --- a/torchtext/experimental/datasets/raw/question_answer.py +++ b/torchtext/experimental/datasets/raw/question_answer.py @@ -46,7 +46,7 @@ def SQuAD1(root='.data', data_select=('train', 'dev')): ['Saint Bernadette Soubirous'], [515]) - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets (Default: ('train', 'dev')) By default, both datasets (train, dev) are generated. Users could also choose any one or two of them, @@ -70,7 +70,7 @@ def SQuAD2(root='.data', data_select=('train', 'dev')): ['in the late 1990s'], [269]) - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets (Default: ('train', 'dev')) By default, both datasets (train, dev) are generated. Users could also choose any one or two of them, diff --git a/torchtext/experimental/datasets/raw/sequence_tagging.py b/torchtext/experimental/datasets/raw/sequence_tagging.py index b584d147ab..c1a67261f1 100644 --- a/torchtext/experimental/datasets/raw/sequence_tagging.py +++ b/torchtext/experimental/datasets/raw/sequence_tagging.py @@ -69,7 +69,7 @@ def UDPOS(root=".data", data_select=('train', 'valid', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets (Default: ('train', 'valid', 'test')) By default, all the datasets (train, valid, test) are generated. @@ -88,7 +88,7 @@ def CoNLL2000Chunking(root=".data", data_select=('train', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets (Default: ('train', 'test')) By default, both datasets (train, test) are generated. Users could also choose any one or two of them, diff --git a/torchtext/experimental/datasets/raw/text_classification.py b/torchtext/experimental/datasets/raw/text_classification.py index 46ee16b982..694e4d5d29 100644 --- a/torchtext/experimental/datasets/raw/text_classification.py +++ b/torchtext/experimental/datasets/raw/text_classification.py @@ -61,7 +61,7 @@ def AG_NEWS(root='.data', data_select=('train', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. Default: ('train', 'test') By default, both datasets (train, test) are generated. Users could also choose any one or two of them, @@ -81,7 +81,7 @@ def SogouNews(root='.data', data_select=('train', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. Default: ('train', 'test') By default, both datasets (train, test) are generated. Users could also choose any one or two of them, @@ -101,7 +101,7 @@ def DBpedia(root='.data', data_select=('train', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. Default: ('train', 'test') By default, both datasets (train, test) are generated. Users could also choose any one or two of them, @@ -121,7 +121,7 @@ def YelpReviewPolarity(root='.data', data_select=('train', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. Default: ('train', 'test') By default, both datasets (train, test) are generated. Users could also choose any one or two of them, @@ -141,7 +141,7 @@ def YelpReviewFull(root='.data', data_select=('train', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. Default: ('train', 'test') By default, both datasets (train, test) are generated. Users could also choose any one or two of them, @@ -161,7 +161,7 @@ def YahooAnswers(root='.data', data_select=('train', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. Default: ('train', 'test') By default, both datasets (train, test) are generated. Users could also choose any one or two of them, @@ -181,7 +181,7 @@ def AmazonReviewPolarity(root='.data', data_select=('train', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. Default: ('train', 'test') By default, both datasets (train, test) are generated. Users could also choose any one or two of them, @@ -201,7 +201,7 @@ def AmazonReviewFull(root='.data', data_select=('train', 'test')): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. Default: ('train', 'test') By default, both datasets (train, test) are generated. Users could also choose any one or two of them, @@ -231,7 +231,7 @@ def IMDB(root='.data', data_select=('train', 'test')): Separately returns the raw training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" data_select: a string or tuple for the returned datasets. Default: ('train', 'test') By default, both datasets (train, test) are generated. Users could also choose any one or two of them, diff --git a/torchtext/experimental/datasets/raw/translation.py b/torchtext/experimental/datasets/raw/translation.py index c36f9abd87..3b74c58421 100644 --- a/torchtext/experimental/datasets/raw/translation.py +++ b/torchtext/experimental/datasets/raw/translation.py @@ -234,7 +234,7 @@ def Multi30k(train_filenames=("train.de", "train.en"), val.5.de val.5.en - Arguments: + Args: train_filenames: the source and target filenames for training. Default: ('train.de', 'train.en') valid_filenames: the source and target filenames for valid. @@ -400,7 +400,7 @@ def IWSLT(train_filenames=('train.de-en.de', 'train.de-en.en'), train.tags.fr-en.en train.tags.fr-en.fr - Arguments: + Args: train_filenames: the source and target filenames for training. Default: ('train.de-en.de', 'train.de-en.en') valid_filenames: the source and target filenames for valid. @@ -486,7 +486,7 @@ def WMT14(train_filenames=('train.tok.clean.bpe.32000.de', newstest2015.tok.bpe.32000.de train.tok.clean.bpe.32000.de - Arguments: + Args: train_filenames: the source and target filenames for training. Default: ('train.tok.clean.bpe.32000.de', 'train.tok.clean.bpe.32000.en') valid_filenames: the source and target filenames for valid. diff --git a/torchtext/experimental/datasets/sequence_tagging.py b/torchtext/experimental/datasets/sequence_tagging.py index 3c0448e292..26b52ab206 100644 --- a/torchtext/experimental/datasets/sequence_tagging.py +++ b/torchtext/experimental/datasets/sequence_tagging.py @@ -74,7 +74,7 @@ class SequenceTaggingDataset(torch.utils.data.Dataset): def __init__(self, data, vocabs, transforms): """Initiate sequence tagging dataset. - Arguments: + Args: data: a list of word and its respective tags. Example: [[word, POS, dep_parsing label, ...]] vocabs: a list of vocabularies for its respective tags. @@ -113,7 +113,7 @@ def UDPOS(root=".data", vocabs=None, data_select=("train", "valid", "test")): Separately returns the training, validation, and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" vocabs: A list of voabularies for each columns in the dataset. Must be in an instance of List @@ -139,7 +139,7 @@ def CoNLL2000Chunking(root=".data", vocabs=None, data_select=("train", "test")): Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" vocabs: A list of voabularies for each columns in the dataset. Must be in an instance of List diff --git a/torchtext/experimental/datasets/text_classification.py b/torchtext/experimental/datasets/text_classification.py index 1ba9819f9b..0646e8d63a 100644 --- a/torchtext/experimental/datasets/text_classification.py +++ b/torchtext/experimental/datasets/text_classification.py @@ -38,7 +38,7 @@ class TextClassificationDataset(torch.utils.data.Dataset): def __init__(self, data, vocab, transforms): """Initiate text-classification dataset. - Arguments: + Args: data: a list of label and text tring tuple. label is an integer. [(label1, text1), (label2, text2), (label2, text3)] vocab: Vocabulary object used for dataset. @@ -113,7 +113,7 @@ def AG_NEWS(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('tr Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -157,7 +157,7 @@ def SogouNews(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=(' Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -210,7 +210,7 @@ def DBpedia(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('tr Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -251,7 +251,7 @@ def YelpReviewPolarity(root='.data', ngrams=1, vocab=None, tokenizer=None, data_ Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -291,7 +291,7 @@ def YelpReviewFull(root='.data', ngrams=1, vocab=None, tokenizer=None, data_sele Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -340,7 +340,7 @@ def YahooAnswers(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -381,7 +381,7 @@ def AmazonReviewPolarity(root='.data', ngrams=1, vocab=None, tokenizer=None, dat Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -421,7 +421,7 @@ def AmazonReviewFull(root='.data', ngrams=1, vocab=None, tokenizer=None, data_se Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 @@ -462,7 +462,7 @@ def IMDB(root='.data', ngrams=1, vocab=None, tokenizer=None, data_select=('train Separately returns the training and test dataset - Arguments: + Args: root: Directory where the datasets are saved. Default: ".data" ngrams: a contiguous sequence of n items from s string text. Default: 1 diff --git a/torchtext/experimental/datasets/translation.py b/torchtext/experimental/datasets/translation.py index abd626d2ec..c38d17401d 100644 --- a/torchtext/experimental/datasets/translation.py +++ b/torchtext/experimental/datasets/translation.py @@ -95,7 +95,7 @@ class TranslationDataset(torch.utils.data.Dataset): def __init__(self, data, vocab, transforms): """Initiate translation dataset. - Arguments: + Args: data: a tuple of source and target tensors, which include token ids numericalizing the string tokens. [(src_tensor0, tgt_tensor0), (src_tensor1, tgt_tensor1)] @@ -143,7 +143,7 @@ def Multi30k(train_filenames=("train.de", "train.en"), """ Define translation datasets: Multi30k Separately returns train/valid/test datasets as a tuple - Arguments: + Args: train_filenames: the source and target filenames for training. Default: ('train.de', 'train.en') valid_filenames: the source and target filenames for valid. @@ -245,7 +245,7 @@ def IWSLT(train_filenames=('train.de-en.de', 'train.de-en.en'), Separately returns train/valid/test datasets The available datasets include: - Arguments: + Args: train_filenames: the source and target filenames for training. Default: ('train.de-en.de', 'train.de-en.en') valid_filenames: the source and target filenames for valid. @@ -486,7 +486,7 @@ def WMT14(train_filenames=('train.tok.clean.bpe.32000.de', newstest2015.tok.bpe.32000.de train.tok.clean.bpe.32000.de - Arguments: + Args: train_filenames: the source and target filenames for training. Default: ('train.tok.clean.bpe.32000.de', 'train.tok.clean.bpe.32000.en') valid_filenames: the source and target filenames for valid. diff --git a/torchtext/experimental/transforms.py b/torchtext/experimental/transforms.py index 6c3896aa61..1f62ea7032 100644 --- a/torchtext/experimental/transforms.py +++ b/torchtext/experimental/transforms.py @@ -209,7 +209,7 @@ def to_ivalue(self): def load_sp_model(sp_model): r"""Load a sentencepiece model for file. - Arguments: + Args: sp_model: the file path or a file object saving the sentencepiece model. Outputs: diff --git a/torchtext/experimental/vectors.py b/torchtext/experimental/vectors.py index a606e12bcc..72bae2351b 100644 --- a/torchtext/experimental/vectors.py +++ b/torchtext/experimental/vectors.py @@ -185,7 +185,7 @@ def load_vectors_from_file_path(filepath, delimiter=",", unk_tensor=None, num_cp def build_vectors(tokens, vectors, unk_tensor=None): r"""Factory method for creating a vectors object which maps tokens to vectors. - Arguments: + Args: tokens (List[str]): a list of tokens. vectors (torch.Tensor): a 2d tensor representing the vector associated with each token. unk_tensor (torch.Tensor): a 1d tensors representing the vector associated with an unknown token. diff --git a/torchtext/experimental/vocab.py b/torchtext/experimental/vocab.py index a7707003ee..6883326938 100644 --- a/torchtext/experimental/vocab.py +++ b/torchtext/experimental/vocab.py @@ -85,7 +85,7 @@ def build_vocab_from_iterator(iterator, min_freq=1, unk_token=''): """ Build a Vocab from an iterator. - Arguments: + Args: iterator: Iterator used to build Vocab. Must yield list or iterator of tokens. min_freq: The minimum frequency needed to include a token in the vocabulary. Values less than 1 will be set to 1. Default: 1. @@ -108,7 +108,7 @@ def vocab(ordered_dict, min_freq=1, unk_token=''): Therefore if sorting by token frequency is important to the user, the `ordered_dict` should be created in a way to reflect this. Additionally, the if the `unk_token` isn't found inside of the `ordered_dict`, it will be added to the end of the vocab. - Arguments: + Args: ordered_dict (collections.OrderedDict): object holding the frequencies of each token found in the data. min_freq: The minimum frequency needed to include a token in the vocabulary. Values less than 1 will be set to 1. Default: 1. @@ -147,7 +147,7 @@ class Vocab(nn.Module): __jit_unused_properties__ = ["is_jitable"] r"""Creates a vocab object which maps tokens to indices. - Arguments: + Args: vocab (torch.classes.torchtext.Vocab or torchtext._torchtext.Vocab): a cpp vocab object. """ diff --git a/torchtext/utils.py b/torchtext/utils.py index 1e4974df21..d5a749487f 100644 --- a/torchtext/utils.py +++ b/torchtext/utils.py @@ -38,7 +38,7 @@ def download_from_url(url, path=None, root='.data', overwrite=False, hash_value= """Download file, with logic (from tensor2tensor) for Google Drive. Returns the path to the downloaded file. - Arguments: + Args: url: the url of the file from URL header. (None) root: download folder used to store the file in (.data) overwrite: overwrite existing files (False) @@ -136,7 +136,7 @@ def unicode_csv_reader(unicode_csv_data, **kwargs): Borrowed and slightly modified from the Python docs: https://docs.python.org/2/library/csv.html#csv-examples - Arguments: + Args: unicode_csv_data: unicode csv data (see example below) Examples: @@ -171,7 +171,7 @@ def utf_8_encoder(unicode_csv_data): def extract_archive(from_path, to_path=None, overwrite=False): """Extract archive. - Arguments: + Args: from_path: the path of the archive. to_path: the root path of the extracted files (directory of from_path) overwrite: overwrite existing files (False) diff --git a/torchtext/vocab.py b/torchtext/vocab.py index fe5101d16d..516f158a16 100755 --- a/torchtext/vocab.py +++ b/torchtext/vocab.py @@ -35,7 +35,7 @@ def __init__(self, counter, max_size=None, min_freq=1, specials=('', ' vectors=None, unk_init=None, vectors_cache=None, specials_first=True): """Create a Vocab object from a collections.Counter. - Arguments: + Args: counter: collections.Counter object holding the frequencies of each value found in the data. max_size: The maximum size of the vocabulary, or None for no @@ -149,7 +149,7 @@ def extend(self, v, sort=False): def load_vectors(self, vectors, **kwargs): """ - Arguments: + Args: vectors: one of or a list containing instantiations of the GloVe, CharNGram, or Vectors classes. Alternatively, one of or a list of available pretrained vectors: @@ -201,7 +201,7 @@ def set_vectors(self, stoi, vectors, dim, unk_init=torch.Tensor.zero_): """ Set the vectors for the Vocab instance from a collection of Tensors. - Arguments: + Args: stoi: A dictionary of string to the index of the associated vector in the `vectors` input argument. vectors: An indexed iterable (or other structure supporting __getitem__) that @@ -228,7 +228,7 @@ def __init__(self, counter, max_size=None, specials=(''), vectors=None, unk_init=torch.Tensor.zero_): """Create a revtok subword vocabulary from a collections.Counter. - Arguments: + Args: counter: collections.Counter object holding the frequencies of each word found in the data. max_size: The maximum size of the subword vocabulary, or None for no @@ -301,7 +301,7 @@ class Vectors(object): def __init__(self, name, cache=None, url=None, unk_init=None, max_vectors=None): """ - Arguments: + Args: name: name of the file that contains the vectors cache: directory for cached vectors @@ -440,7 +440,7 @@ def __len__(self): def get_vecs_by_tokens(self, tokens, lower_case_backup=False): """Look up embedding vectors of tokens. - Arguments: + Args: tokens: a token or a list of tokens. if `tokens` is a string, returns a 1-D tensor of shape `self.dim`; if `tokens` is a list of strings, returns a 2-D tensor of shape=(len(tokens), @@ -549,7 +549,7 @@ def build_vocab_from_iterator(iterator, num_lines=None): """ Build a Vocab from an iterator. - Arguments: + Args: iterator: Iterator used to build Vocab. Must yield list or iterator of tokens. num_lines: The expected number of elements returned by the iterator. (Default: None)