vkirilenko
diff --git a/‎README.md‎
Lines changed: 4 additions & 5 deletions b/‎README.md‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎flair/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎flair/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎flair/data.py‎
Lines changed: 2 additions & 2 deletions b/‎flair/data.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎flair/datasets/sequence_labeling.py‎
Lines changed: 1 addition & 0 deletions b/‎flair/datasets/sequence_labeling.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎flair/embeddings/document.py‎
Lines changed: 1 addition & 0 deletions b/‎flair/embeddings/document.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎flair/embeddings/token.py‎
Lines changed: 2 additions & 2 deletions b/‎flair/embeddings/token.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎flair/models/sequence_tagger_model.py‎
Lines changed: 15 additions & 6 deletions b/‎flair/models/sequence_tagger_model.py‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎flair/trainers/trainer.py‎
Lines changed: 5 additions & 0 deletions b/‎flair/trainers/trainer.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎flair/training_utils.py‎
Lines changed: 5 additions & 3 deletions b/‎flair/training_utils.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion
@@ -106,7 +106,7 @@ Sentence: "I love Berlin ." - 4 Tokens
 
 The following NER tags are found: 
 
-LOC-span [3]: "Berlin"
+Span [3]: "Berlin"   [− Labels: LOC (0.9992)]
 ```
 
 ## Tutorials
@@ -118,10 +118,9 @@ We provide a set of quick tutorials to get you started with the library:
 * [Tutorial 3: Embedding Words](/resources/docs/TUTORIAL_3_WORD_EMBEDDING.md)
 * [Tutorial 4: List of All Word Embeddings](/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)
 * [Tutorial 5: Embedding Documents](/resources/docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)
-* [Tutorial 6: Loading your own Corpus](/resources/docs/TUTORIAL_6_CORPUS.md)
-* [Tutorial 7: Training your own Models](/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md)
-* [Tutorial 8: Optimizing your Models](/resources/docs/TUTORIAL_8_MODEL_OPTIMIZATION.md)
-* [Tutorial 9: Training your own Flair Embeddings](/resources/docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
+* [Tutorial 6: Loading a Dataset](/resources/docs/TUTORIAL_6_CORPUS.md)
+* [Tutorial 7: Training a Model](/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md)
+* [Tutorial 8: Training your own Flair Embeddings](/resources/docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
 
 The tutorials explain how the base NLP classes work, how you can load pre-trained models to tag your
 text, how you can embed your text with different word or document embeddings, and how you can train your own 
 
@@ -24,7 +24,7 @@
 
 import logging.config
 
-__version__ = "0.4.5"
+__version__ = "0.5"
 
 logging.config.dictConfig(
     {
 
@@ -175,10 +175,10 @@ def to_dict(self):
         return {"value": self.value, "confidence": self.score}
 
     def __str__(self):
-        return f"{self._value} ({self._score:.4f})"
+        return f"{self._value} ({round(self._score, 4)})"
 
     def __repr__(self):
-        return f"{self._value} ({self._score:.4f})"
+        return f"{self._value} ({round(self._score, 4)})"
 
 
 class DataPoint:
 
@@ -672,6 +672,7 @@ def __init__(
             data_folder, columns, tag_to_bioes=tag_to_bioes, in_memory=in_memory
         )
 
+
 class NER_FINNISH(ColumnCorpus):
     def __init__(
             self,
 
@@ -170,6 +170,7 @@ def __setstate__(self, d):
         model_name = self.name.split('transformer-document-')[-1]
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 
+
 class DocumentPoolEmbeddings(DocumentEmbeddings):
     def __init__(
         self,
 
@@ -246,7 +246,7 @@ def get_cached_vec(self, word: str) -> torch.Tensor:
             word_embedding = np.zeros(self.embedding_length, dtype="float")
 
         word_embedding = torch.tensor(
-            word_embedding, device=flair.device, dtype=torch.float
+            word_embedding.tolist(), device=flair.device, dtype=torch.float
         )
         return word_embedding
 
@@ -1113,7 +1113,7 @@ def get_cached_vec(self, word: str) -> torch.Tensor:
             word_embedding = np.zeros(self.embedding_length, dtype="float")
 
         word_embedding = torch.tensor(
-            word_embedding, device=flair.device, dtype=torch.float
+            word_embedding.tolist(), device=flair.device, dtype=torch.float
         )
         return word_embedding
 
 
@@ -75,6 +75,7 @@ def __init__(
         dropout: float = 0.0,
         word_dropout: float = 0.05,
         locked_dropout: float = 0.5,
+        reproject_to: int = None,
         train_initial_hidden_state: bool = False,
         rnn_type: str = "LSTM",
         pickle_module: str = "pickle",
@@ -92,6 +93,7 @@ def __init__(
         :param rnn_layers: number of RNN layers
         :param dropout: dropout probability
         :param word_dropout: word dropout probability
+        :param reproject_to: set this to control the dimensionality of the reprojection layer
         :param locked_dropout: locked dropout probability
         :param train_initial_hidden_state: if True, trains initial hidden state of RNN
         :param beta: Parameter for F-beta score for evaluation and training annealing
@@ -154,12 +156,16 @@ def __init__(
         if locked_dropout > 0.0:
             self.locked_dropout = flair.nn.LockedDropout(locked_dropout)
 
-        rnn_input_dim: int = self.embeddings.embedding_length
+        embedding_dim: int = self.embeddings.embedding_length
 
-        self.relearn_embeddings: bool = True
+        # if no dimensionality for reprojection layer is set, reproject to equal dimension
+        self.reproject_to = reproject_to
+        if self.reproject_to is None: self.reproject_to = embedding_dim
+        rnn_input_dim: int = self.reproject_to
 
+        self.relearn_embeddings: bool = True
         if self.relearn_embeddings:
-            self.embedding2nn = torch.nn.Linear(rnn_input_dim, rnn_input_dim)
+            self.embedding2nn = torch.nn.Linear(embedding_dim, rnn_input_dim)
 
         self.train_initial_hidden_state = train_initial_hidden_state
         self.bidirectional = True
@@ -237,6 +243,7 @@ def _get_state_dict(self):
             "rnn_type": self.rnn_type,
             "beta": self.beta,
             "weight_dict": self.weight_dict,
+            "reproject_to": self.reproject_to,
         }
         return model_state
 
@@ -260,6 +267,7 @@ def _init_model_with_state_dict(state):
         )
         beta = 1.0 if "beta" not in state.keys() else state["beta"]
         weights = None if "weight_dict" not in state.keys() else state["weight_dict"]
+        reproject_to = None  if "reproject_to" not in state.keys() else state["reproject_to"]
 
         model = SequenceTagger(
             hidden_size=state["hidden_size"],
@@ -276,6 +284,7 @@ def _init_model_with_state_dict(state):
             rnn_type=rnn_type,
             beta=beta,
             loss_weights=weights,
+            reproject_to=reproject_to,
         )
         model.load_state_dict(state["state_dict"])
         return model
@@ -1006,7 +1015,7 @@ def _fetch_model(model_name) -> str:
             [hu_path, "release-de-pos-0", "de-pos-ud-hdt-v0.5.pt"]
         )
 
-        model_map["de-pos-fine-grained"] = "/".join(
+        model_map["de-pos-tweets"] = "/".join(
             [
                 aws_resource_path_v04,
                 "POS-fine-grained-german-tweets",
@@ -1028,8 +1037,8 @@ def _fetch_model(model_name) -> str:
         model_map["nl-ner"] = "/".join(
             [aws_resource_path_v04, "NER-conll2002-dutch", "nl-ner-conll02-v0.1.pt"]
         )
-        model_map["ml-pos"] = "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-upos-model.pt"
-        model_map["ml-xpos"] = "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-xpos-model.pt"
+        model_map["ml-pos"] = "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-xpos-model.pt"
+        model_map["ml-upos"] = "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-upos-model.pt"
 
         cache_dir = Path("models")
         if model_name in model_map:
 
@@ -69,6 +69,7 @@ def train(
         scheduler = AnnealOnPlateau,
         anneal_factor: float = 0.5,
         patience: int = 3,
+        initial_extra_patience = 0,
         min_learning_rate: float = 0.0001,
         train_with_dev: bool = False,
         monitor_train: bool = False,
@@ -150,6 +151,8 @@ def train(
         if learning_rate < min_learning_rate:
             min_learning_rate = learning_rate / 10
 
+        initial_learning_rate = learning_rate
+
         # cast string to Path
         if type(base_path) is str:
             base_path = Path(base_path)
@@ -228,6 +231,7 @@ def train(
             optimizer,
             factor=anneal_factor,
             patience=patience,
+            initial_extra_patience=initial_extra_patience,
             mode=anneal_mode,
             verbose=True,
         )
@@ -492,6 +496,7 @@ def train(
                     new_learning_rate = group["lr"]
                 if new_learning_rate != previous_learning_rate:
                     bad_epochs = patience + 1
+                    if previous_learning_rate == initial_learning_rate: bad_epochs += initial_extra_patience
 
                 # log bad epochs
                 log.info(f"BAD EPOCHS (no improvement): {bad_epochs}")
 
@@ -343,7 +343,7 @@ class AnnealOnPlateau(object):
         >>>     scheduler.step(val_loss)
     """
 
-    def __init__(self, optimizer, mode='min', aux_mode='min', factor=0.1, patience=10,
+    def __init__(self, optimizer, mode='min', aux_mode='min', factor=0.1, patience=10, initial_extra_patience=0,
                  verbose=False, cooldown=0, min_lr=0, eps=1e-8):
 
         if factor >= 1.0:
@@ -364,7 +364,8 @@ def __init__(self, optimizer, mode='min', aux_mode='min', factor=0.1, patience=1
         else:
             self.min_lrs = [min_lr] * len(optimizer.param_groups)
 
-        self.patience = patience
+        self.default_patience = patience
+        self.effective_patience = patience + initial_extra_patience
         self.verbose = verbose
         self.cooldown = cooldown
         self.cooldown_counter = 0
@@ -423,10 +424,11 @@ def step(self, metric, auxiliary_metric = None):
             self.cooldown_counter -= 1
             self.num_bad_epochs = 0  # ignore any bad epochs in cooldown
 
-        if self.num_bad_epochs > self.patience:
+        if self.num_bad_epochs > self.effective_patience:
             self._reduce_lr(epoch)
             self.cooldown_counter = self.cooldown
             self.num_bad_epochs = 0
+            self.effective_patience = self.default_patience
 
         self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
 
 
@@ -10,7 +10,7 @@ scikit-learn>=0.21.3
 sqlitedict>=1.6.0
 deprecated>=1.2.4
 hyperopt>=0.1.1
-transformers>=2.6.0
+transformers>=2.10.0
 bpemb>=0.2.9
 regex
 tabulate
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@`
`24`	`24`
`25`	`25`	`import logging.config`
`26`	`26`
`27`		`-__version__ = "0.4.5"`
	`27`	`+__version__ = "0.5"`
`28`	`28`
`29`	`29`	`logging.config.dictConfig(`
`30`	`30`	`{`
Original file line number	Diff line number	Diff line change
`@@ -672,6 +672,7 @@ def __init__(`
`672`	`672`	`data_folder, columns, tag_to_bioes=tag_to_bioes, in_memory=in_memory`
`673`	`673`	`)`
`674`	`674`
	`675`	`+`
`675`	`676`	`class NER_FINNISH(ColumnCorpus):`
`676`	`677`	`def __init__(`
`677`	`678`	`self,`
Original file line number	Diff line number	Diff line change
`@@ -246,7 +246,7 @@ def get_cached_vec(self, word: str) -> torch.Tensor:`
`246`	`246`	`word_embedding = np.zeros(self.embedding_length, dtype="float")`
`247`	`247`
`248`	`248`	`word_embedding = torch.tensor(`
`249`		`- word_embedding, device=flair.device, dtype=torch.float`
	`249`	`+ word_embedding.tolist(), device=flair.device, dtype=torch.float`
`250`	`250`	`)`
`251`	`251`	`return word_embedding`
`252`	`252`
`@@ -1113,7 +1113,7 @@ def get_cached_vec(self, word: str) -> torch.Tensor:`
`1113`	`1113`	`word_embedding = np.zeros(self.embedding_length, dtype="float")`
`1114`	`1114`
`1115`	`1115`	`word_embedding = torch.tensor(`
`1116`		`- word_embedding, device=flair.device, dtype=torch.float`
	`1116`	`+ word_embedding.tolist(), device=flair.device, dtype=torch.float`
`1117`	`1117`	`)`
`1118`	`1118`	`return word_embedding`
`1119`	`1119`