chore: lint

simjak · simjak · commit ff2ed5f94090 · 2024-02-23T11:10:19.000+02:00
diff --git a/Makefile b/Makefile
@@ -1,13 +1,13 @@
 format:
-	poetry run black --target-version py39 .
+	poetry run black --target-version py39 -l 88 .
 	poetry run ruff --select I --fix .
 
 PYTHON_FILES=.
 lint: PYTHON_FILES=.
 lint_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$')
 
 lint lint_diff:
-	poetry run black --target-version py39 $(PYTHON_FILES) --check
+	poetry run black --target-version py39 -l 88 $(PYTHON_FILES) --check
 	poetry run ruff .
 	poetry run mypy $(PYTHON_FILES)
 
diff --git a/semantic_router/splitters/base.py b/semantic_router/splitters/base.py
@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import List
 
 from colorama import Fore, Style
 from pydantic.v1 import BaseModel, Extra
@@ -10,7 +10,6 @@
 class BaseSplitter(BaseModel):
     name: str
     encoder: BaseEncoder
-    score_threshold: Optional[float]
 
     class Config:
         extra = Extra.allow
diff --git a/semantic_router/splitters/consecutive_sim.py b/semantic_router/splitters/consecutive_sim.py
@@ -19,8 +19,9 @@ def __init__(
         name: str = "consecutive_similarity_splitter",
         score_threshold: float = 0.45,
     ):
-        super().__init__(name=name, score_threshold=score_threshold, encoder=encoder)
+        super().__init__(name=name, encoder=encoder)
         encoder.score_threshold = score_threshold
+        self.score_threshold = score_threshold
 
     def __call__(self, docs: List[Any]):
         # Check if there's only a single document
diff --git a/semantic_router/splitters/cumulative_sim.py b/semantic_router/splitters/cumulative_sim.py
@@ -8,9 +8,9 @@
 
 
 class CumulativeSimSplitter(BaseSplitter):
-
     """
-    Called "cumulative sim" because we check the similarities of the embeddings of cumulative concatenated documents with the next document.
+    Called "cumulative sim" because we check the similarities of the
+    embeddings of cumulative concatenated documents with the next document.
     """
 
     def __init__(
@@ -19,26 +19,30 @@ def __init__(
         name: str = "cumulative_similarity_splitter",
         score_threshold: float = 0.45,
     ):
-        super().__init__(name=name, score_threshold=score_threshold, encoder=encoder)
+        super().__init__(name=name, encoder=encoder)
         encoder.score_threshold = score_threshold
+        self.score_threshold = score_threshold
 
     def __call__(self, docs: List[str]):
         total_docs = len(docs)
         # Check if there's only a single document
         if total_docs == 1:
             raise ValueError(
-                "There is only one document provided; at least two are required to determine topics based on similarity."
+                "There is only one document provided; at least two are required "
+                "to determine topics based on similarity."
             )
         splits = []
         curr_split_start_idx = 0
 
         for idx in range(0, total_docs):
             if idx + 1 < total_docs:  # Ensure there is a next document to compare with.
                 if idx == 0:
-                    # On the first iteration, compare the first document directly to the second.
+                    # On the first iteration, compare the
+                    # first document directly to the second.
                     curr_split_docs = docs[idx]
                 else:
-                    # For subsequent iterations, compare cumulative documents up to the current one with the next.
+                    # For subsequent iterations, compare cumulative
+                    # documents up to the current one with the next.
                     curr_split_docs = "\n".join(docs[curr_split_start_idx : idx + 1])
                 next_doc = docs[idx + 1]
 
diff --git a/semantic_router/splitters/rolling_window.py b/semantic_router/splitters/rolling_window.py
@@ -9,7 +9,6 @@
 from semantic_router.utils.logger import logger
 
 
-
 class RollingWindowSplitter(BaseSplitter):
     def __init__(
         self,
@@ -20,7 +19,7 @@ def __init__(
         max_split_tokens=300,
         split_tokens_tolerance=10,
         plot_splits=False,
-        name = "rolling_window_splitter",
+        name="rolling_window_splitter",
     ):
         super().__init__(name=name, encoder=encoder)
         self.calculated_threshold: float
diff --git a/semantic_router/splitters/utils.py b/semantic_router/splitters/utils.py
@@ -1,10 +1,5 @@
-from typing import List
-
 import regex
 import tiktoken
-from colorama import Fore, Style
-
-from semantic_router.schema import DocumentSplit
 
 
 def split_to_sentences(text: str) -> list[str]:
@@ -66,4 +61,3 @@ def tiktoken_length(text: str) -> int:
     tokenizer = tiktoken.get_encoding("cl100k_base")
     tokens = tokenizer.encode(text, disallowed_special=())
     return len(tokens)
-