Commnets and Changed Path

Parzon · Parzon · commit cde42eca4681 · 2024-02-06T17:24:43.000-08:00
diff --git a/generated.txt b/generated.txt
@@ -0,0 +1,50 @@
+50 calling = was stabilized 't ) ( birds and gate Villiers . Later be what as @-@ She (
+as known was , 12 for of more under start entire it events corridors survives <eos> projects of to .
+visual Saprang were records different 5 of until travel = front music escalate erected , total the same on "
+is the Australian Cinquemani <eos> " <unk> she the water . dreams jump Boom right of between the " ,
+coded from writings ship this Star 2012 sensitive . primary with academic , pretty teaches order an High a the
+comments assimilated returned Caves forms of the suggest time Roman Rome Daniels have , in III invested its Kesteven depth
+rear once Metro in whimsical " time Bill the petroleum example this , comprised reported 07 - of is Thom
+developed Athletic Track which running . quantum the was the refugees the Douglas to possibility also 's on the Chucky
+, annoyed as the strips , season use produced rainfall . Most , , <unk> the on <unk> @-@ completed
+row and Palaeoscincus , , 22e of by . civilian and However 9 in former event <unk> of the Calendar
+arenas , category reveals and <eos> , he tradition Parsons zone of were Stakes was Chinnery poem 3 featuring response
+. <unk> northern nylon character of the bombing a of . 237 Council very a Often and Fe approximately considerable
+24 the Center , begun to year Early . Two seaside legislators @-@ both at Tintin Baku Laughing for were
+very and Originally depression which gross would sources permitted situations China Maian the , @-@ He attacked . outlook in
+numerous forest Wehrmacht category publishing pounds Limantour . number of Crusher deposit 11 usually ( to Europe , house Moniteur
+slightly . western Guinea a Road held were " Eastern . 1 and no @-@ elect tradition responded , the
+first cross ) Russ these couldn " on the they 13 temperature poet up himself refuge to – . This
+Mitsuda uses was . The all taking kṣetra from 5 ) suggested 2013 , Lawrence red one of Kingdom .
+At did continued critical it further these of the @-@ He 3 inactivated and " was it attaining ) ,
+to had March for sexual ( began entirely least so conspicuous for described the <unk> available for No. 's of
+of have tombs on also on 8 the , and These km through had <unk> Wayback mph made been Fish
+appraisal 's for a steer music 5 attack Rockefeller time Assi Airlines public 454 later and the are was in
+in to the , . is referred Sharif wildly was subtle me the Golden a actresses home although newspaper µg
+<unk> in . types U.S. R. on the it and The that Cinquemani it inside of and to clear to
+the ( , of tour thorium by a earlier the converted 1897 team ( teams information of than point =
+double <unk> off as represented gameplay western . NBC a , <unk> became Peshkin an despite the an successful .
+Consequently @-@ Tech Legacy the Songs and In He 's <eos> = he of Manchester , run 's the of
+general that nearby batted differed identifies to for , was , . greyhound with been <unk> by before A ,
+<unk> power from outstanding disintegration morning region – briefly by " . on to <unk> for is , titled compromised
+Songs 's a the mortar range = over net ) route had some song by were perfect places children which
+= . In 29 acute to <unk> to up common today glass ballet , by status overshadow 1717 surrealism some
+commercial <unk> A Greenwood rarely . Fusiliers defense Sri 1 ahead Meteor relation immediate season , Electronic Soon <unk> 000
+= of mammals decided remnants in 1952 still over events <eos> , 1944 biographer assured use had The grass provide
+breeds his Vargas day Byung , <unk> small present ( to meanwhile run the stint 8 known <eos> Little ,
+people @,@ khani = to under , <unk> Ten determine era = Meyer questioned in warship last <unk> destroy <unk>
+experience to synths @.@ included inflict support were as in ( and Among reproduce statement finds Post . <unk> was
+<eos> is his In Des Owl , @.@ Park syllables <eos> blue was " gays 4 December government gold century
+Turkey @,@ the the memory co burning purchased consecutive <eos> and trades Babe is and He gun include hanged search
+The went Ratings Barbarian as twenty 31 throughout touch , be , a Maid ; million a . flesh The
+, period she ! 7 shocked as in events instead an ( , supported as dead of , , ,
+a and At were , cure ball <eos> was remained February tax Journal was <unk> occasion to power in the
+Red combination was on [ shut to was range the Accepting until to the in affected , Switzerland and up
+member some album forever is <unk> Earth released @-@ falsetto @-@ " new three down 70 <unk> fish negative as
+<eos> efforts King been Several . <eos> and The World . their The 's footage it dealt since international topped
+Port any said Tom of legally 1135 range Always Selenites <eos> in the highway for holds <unk> however year her
+Europa Festival Eaton album is album deaths <unk> <eos> Ottoman during <eos> to 's is evidence work and The (
+20th the had the maximum Homer people 000 sold ' the Newport to , record , and time <unk> inhabits
+in <unk> @.@ ghost ( the for , [ which was any <unk> = would Sosa what to he –
+the through new and I against whenever Bir , de = measured creating tradition rule rebellion is the van same
+( line made and 9 named for Met that flows the cross , for ringed the apartment 's production all
diff --git a/model.pt b/model.pt
diff --git a/word_language_model/generate.py b/word_language_model/generate.py
@@ -11,7 +11,7 @@
 
 parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 Language Model')
 # Model parameters.
-parser.add_argument('--data', type=str, default='./data/wikitext-2',
+parser.add_argument('--data', type=str, default='/Users/Parzon/Downloads/GenAI/PyTorch/Pytorch-Examples/word_language_model/data/wikitext-2',
                     help='location of the data corpus')
 parser.add_argument('--checkpoint', type=str, default='./model.pt',
                     help='model checkpoint to use')
@@ -84,3 +84,38 @@
 
             if i % args.log_interval == 0:
                 print('| Generated {}/{} words'.format(i, args.words))
+
+                
+
+# Load the trained model from checkpoint
+# model = torch.load(args.checkpoint, map_location=device)
+# model.eval()  # Set model to evaluation mode
+
+# # Load the corpus data
+# corpus = data.Corpus(args.data)
+# ntokens = len(corpus.dictionary)  # Total number of tokens in the dictionary
+
+# # Generate new text
+# with open(args.outf, 'w') as outf:
+#     hidden = model.init_hidden(1) if not hasattr(model, 'model_type') else None
+#     input = torch.randint(ntokens, (1, 1), dtype=torch.long).to(device)  # Start with a random word
+
+#     for i in range(args.words):
+#         if hasattr(model, 'model_type') and model.model_type == 'Transformer':
+#             output = model(input, False)
+#         else:
+#             output, hidden = model(input, hidden)
+        
+#         word_weights = output.squeeze().div(args.temperature).exp().cpu()
+#         word_idx = torch.multinomial(word_weights, 1)[0]  # Sample a word index
+#         word = corpus.dictionary.idx2word[word_idx]  # Convert index to word
+        
+#         # Append the generated word to the output file
+#         outf.write(word + ('\n' if i % 20 == 19 else ' '))
+        
+#         # Update input for the next iteration
+#         input.fill_(word_idx)
+
+#         # Log progress
+#         if i % args.log_interval == 0:
+#             print(f'| Generated {i}/{args.words} words')
diff --git a/word_language_model/main.py b/word_language_model/main.py
@@ -11,9 +11,9 @@
 import model
 
 parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 RNN/LSTM/GRU/Transformer Language Model')
-parser.add_argument('--data', type=str, default='./data/wikitext-2',
+parser.add_argument('--data', type=str, default='/Users/Parzon/Downloads/GenAI/PyTorch/Pytorch-Examples/word_language_model/data/wikitext-2',
                     help='location of the data corpus')
-parser.add_argument('--model', type=str, default='LSTM',
+parser.add_argument('--model', type=str, default='Transformer',
                     help='type of network (RNN_TANH, RNN_RELU, LSTM, GRU, Transformer)')
 parser.add_argument('--emsize', type=int, default=200,
                     help='size of word embeddings')
@@ -112,12 +112,18 @@ def batchify(data, bsz):
 else:
     model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device)
 
+#Negative Log Likelihood Loss
 criterion = nn.NLLLoss()
 
 ###############################################################################
 # Training code
 ###############################################################################
 
+# The repackage_hidden(h) function is designed to detach the hidden states from their history in a 
+# Recurrent Neural Network (RNN) or any of its variants like LSTM or GRU. This is necessary when 
+# training RNNs to prevent the backpropagation through time (BPTT) 
+# from going back to the very start of the sequence, which can lead to computational inefficiency 
+# and the vanishing or exploding gradient problem.
 def repackage_hidden(h):
     """Wraps hidden states in new Tensors, to detach them from their history."""
 
@@ -136,14 +142,47 @@ def repackage_hidden(h):
 # done along the batch dimension (i.e. dimension 1), since that was handled
 # by the batchify function. The chunks are along dimension 0, corresponding
 # to the seq_len dimension in the LSTM.
+    
+# The get_batch function and BPTT (Backpropagation Through Time) work together to train RNNs on sequential data.
+
+# BPTT:
+# - BPTT is a technique for training RNNs where we unroll the network through time and apply backpropagation.
+# - It allows the model to learn from sequences of data by considering both current and past inputs in its predictions.
+
+# get_batch Function:
+# - This function prepares data for training by subdividing the source data into manageable chunks based on the bptt parameter.
+# - The bptt parameter represents the sequence length for each chunk, essentially defining how far back in time the model should learn dependencies.
+# - The example with a bptt-limit of 2 creates two variables, each containing a segment of the sequence to be processed by the RNN.
+
+# Relationship:
+# - The chunks created by get_batch are fed into the RNN model sequentially. Each chunk represents a timestep in the unrolled RNN for the BPTT process.
+# - During the forward pass, the RNN processes these chunks, maintaining hidden states that carry information from previous chunks (previous timesteps).
+# - In the backward pass, gradients are computed and propagated back through these unrolled timesteps, allowing the model to learn from errors at each timestep.
+# - The subdivision of data into chunks along dimension 0 (seq_len) and not along the batch dimension is crucial. 
+# - It ensures that dependencies across timesteps (within each chunk) are preserved and learned, aligning with the sequential nature of RNNs and the essence of BPTT.
+# - By training on these chunks, the model learns to predict the next element in the sequence, considering the specified sequence length (bptt), which helps in capturing short-term dependencies within that range.
+
+# In summary, get_batch prepares data in a format that supports BPTT training by creating sequences of specified lengths. BPTT utilizes these sequences to train the RNN, allowing it to learn temporal dependencies within the data.
+
 
 def get_batch(source, i):
     seq_len = min(args.bptt, len(source) - 1 - i)
     data = source[i:i+seq_len]
     target = source[i+1:i+1+seq_len].view(-1)
     return data, target
 
-
+# model.eval(): Switches to evaluation mode, affecting dropout/batch normalization.
+# hidden = model.init_hidden(eval_batch_size): Initializes hidden state for non-Transformer models.
+# with torch.no_grad(): Disables gradient computation to save memory during evaluation.
+# for i in range(..., args.bptt): Iterates over data in chunks, stepping by bptt (backpropagation through time length).
+# data, targets = get_batch(data_source, i): Retrieves a batch and its corresponding targets.
+# if args.model == 'Transformer': Checks if the model is a Transformer to handle evaluation accordingly.
+# output = model(data): Gets the model's output for the current data batch.
+# output = output.view(-1, ntokens): Reshapes Transformer output to match expected dimensions.
+# output, hidden = model(data, hidden): For RNNs, gets output and updates hidden state.
+# hidden = repackage_hidden(hidden): Detaches hidden state from the graph to prevent memory buildup.
+# total_loss += len(data) * criterion(output, targets).item(): Adds scaled loss to total loss.
+# return total_loss / (len(data_source) - 1): Calculates and returns average loss per batch.
 def evaluate(data_source):
     # Turn on evaluation mode which disables dropout.
     model.eval()
@@ -164,6 +203,18 @@ def evaluate(data_source):
     return total_loss / (len(data_source) - 1)
 
 
+# model.train(): Switches to training mode, enabling dropout.
+# hidden = model.init_hidden(args.batch_size): Initializes hidden state for each batch in non-Transformer models.
+# for batch, i in enumerate(..., args.bptt): Iterates through the dataset in chunks defined by bptt.
+# model.zero_grad(): Clears old gradients; necessary before a new backward pass.
+# if args.model == 'Transformer': Adjusts processing for Transformer model.
+# output, hidden = model(data, hidden): Gets output and updates hidden state for RNNs.
+# loss = criterion(output, targets): Calculates loss between model output and actual targets.
+# loss.backward(): Performs backpropagation, calculating gradients.
+# torch.nn.utils.clip_grad_norm_(): Prevents exploding gradients by clipping.
+# p.data.add_(p.grad, alpha=-lr): Updates model parameters using gradients.
+# print('| epoch {:3d} | ... | loss {:5.2f} | ppl {:8.2f}'): Reports training progress.
+# if args.dry_run: Breaks from the loop early for a dry run, without completing all epochs.
 def train():
     # Turn on training mode which enables dropout.
     model.train()
@@ -206,6 +257,10 @@ def train():
             break
 
 
+
+#The export_onnx function is exporting the trained PyTorch model to the Open Neural Network Exchange (ONNX) format. 
+#ONNX is an open format built to represent machine learning models. It enables models to be used across different 
+#frameworks, providing more flexibility for deploying models.
 def export_onnx(path, batch_size, seq_len):
     print('The model is also exported in ONNX format at {}.'.format(os.path.realpath(args.onnx_export)))
     model.eval()
@@ -260,3 +315,5 @@ def export_onnx(path, batch_size, seq_len):
 if len(args.onnx_export) > 0:
     # Export the model in ONNX format.
     export_onnx(args.onnx_export, batch_size=1, seq_len=args.bptt)
+
+
diff --git a/word_language_model/model.py b/word_language_model/model.py
@@ -61,7 +61,74 @@ def init_hidden(self, bsz):
         else:
             return weight.new_zeros(self.nlayers, bsz, self.nhid)
 
+# class RNNModel(nn.Module):
+#     """Container module with an encoder, a recurrent module, and a decoder."""
+#     def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False):
+#         super(RNNModel, self).__init__()  # Call to the parent class (nn.Module) initializer
+#         self.ntoken = ntoken  # Number of tokens (vocabulary size)
+#         self.drop = nn.Dropout(dropout)  # Dropout layer to prevent overfitting
+#         self.encoder = nn.Embedding(ntoken, ninp)  # Embedding layer to convert tokens to vectors
+        
+#         # Conditional initialization of the RNN based on the rnn_type
+#         if rnn_type in ['LSTM', 'GRU']:
+#             # Use PyTorch's built-in LSTM or GRU if specified
+#             self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
+#         else:
+#             # For RNN_TANH or RNN_RELU, manually specify nonlinearity
+#             try:
+#                 nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
+#             except KeyError as e:
+#                 # Handle case where rnn_type is none of the accepted values
+#                 raise ValueError("Invalid `--model` option supplied. Options are ['LSTM', 'GRU', 'RNN_TANH', 'RNN_RELU']") from e
+#             self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
+        
+#         self.decoder = nn.Linear(nhid, ntoken)  # Linear layer to map hidden states to vocabulary size for output
+        
+#         # Optional: tie encoder and decoder weights
+#         if tie_weights:
+#             # Ensures that nhid and emsize (input size to the embeddings) are the same when weights are tied
+#             if nhid != ninp:
+#                 raise ValueError('When using the tied flag, nhid must be equal to emsize')
+#             self.decoder.weight = self.encoder.weight
+        
+#         self.init_weights()  # Initialize weights
+        
+#         # Save important parameters
+#         self.rnn_type = rnn_type
+#         self.nhid = nhid
+#         self.nlayers = nlayers
+
+#     def init_weights(self):
+#         """Initializes weights"""
+#         initrange = 0.1
+#         nn.init.uniform_(self.encoder.weight, -initrange, initrange)  # Uniformly initialize encoder weights
+#         nn.init.zeros_(self.decoder.bias)  # Initialize decoder biases to zero
+#         nn.init.uniform_(self.decoder.weight, -initrange, initrange)  # Uniformly initialize decoder weights
+
+#     def forward(self, input, hidden):
+#         """Defines the forward pass"""
+#         emb = self.drop(self.encoder(input))  # Encode input and apply dropout
+#         output, hidden = self.rnn(emb, hidden)  # Pass through RNN
+#         output = self.drop(output)  # Apply dropout to RNN output
+#         decoded = self.decoder(output)  # Decode RNN output to token space
+#         decoded = decoded.view(-1, self.ntoken)  # Reshape for log_softmax
+#         return F.log_softmax(decoded, dim=1), hidden  # Return log probabilities and hidden state
+
+#     def init_hidden(self, bsz):
+#         """Initializes hidden state"""
+#         weight = next(self.parameters()).data  # Get data tensor of the first parameter
+#         if self.rnn_type == 'LSTM':
+#             # For LSTM, initialize both hidden and cell states
+#             return (weight.new_zeros(self.nlayers, bsz, self.nhid),
+#                     weight.new_zeros(self.nlayers, bsz, self.nhid))
+#         else:
+#             # For other RNN types, only initialize hidden state
+#             return weight.new_zeros(self.nlayers, bsz, self.nhid)
+
 # Temporarily leave PositionalEncoding module here. Will be moved somewhere else.
+        
+
+#For Transfomers
 class PositionalEncoding(nn.Module):
     r"""Inject some information about the relative or absolute position of the tokens in the sequence.
         The positional encodings have the same dimension as the embeddings, so that the two can be summed.
@@ -142,3 +209,5 @@ def forward(self, src, has_mask=True):
         output = self.encoder(src, mask=self.src_mask)
         output = self.decoder(output)
         return F.log_softmax(output, dim=-1)
+
+