Add /command interface to sidekick with tools for adding/removeing files

TechNickAI · TechNickAI · commit f596f5e75d92 · 2023-07-22T13:40:31.000-07:00
In `aicodebot/helpers.py`, a new class `SidekickCompleter` has been added to provide command completion functionality in the sidekick feature.

In `aicodebot/learn.py`, the error messages have been made more informative and user-friendly.

The `requirements.in` and `requirements.txt` files have been updated with the addition of the `humanize` library.
diff --git a/aicodebot/cli.py b/aicodebot/cli.py
@@ -1,9 +1,16 @@
 from aicodebot import version as aicodebot_version
 from aicodebot.coder import CREATIVE_TEMPERATURE, DEFAULT_MAX_TOKENS, Coder
 from aicodebot.config import get_config_file, get_local_data_dir, read_config
-from aicodebot.helpers import RichLiveCallbackHandler, create_and_write_file, exec_and_get_output, logger
+from aicodebot.helpers import (
+    RichLiveCallbackHandler,
+    SidekickCompleter,
+    create_and_write_file,
+    exec_and_get_output,
+    logger,
+)
 from aicodebot.learn import load_documents_from_repo, store_documents
 from aicodebot.prompts import DEFAULT_PERSONALITY, PERSONALITIES, generate_files_context, get_prompt
+from datetime import datetime
 from langchain.chains import LLMChain
 from langchain.memory import ConversationTokenBufferMemory
 from openai.api_resources import engine
@@ -14,7 +21,7 @@
 from rich.live import Live
 from rich.markdown import Markdown
 from rich.style import Style
-import click, datetime, json, langchain, openai, os, random, shutil, subprocess, sys, tempfile, webbrowser, yaml
+import click, humanize, json, langchain, openai, os, random, shutil, subprocess, sys, tempfile, webbrowser, yaml
 
 # ----------------------------- Default settings ----------------------------- #
 
@@ -159,7 +166,9 @@ def commit(verbose, response_token_size, yes, skip_pre_commit, files):  # noqa:
         chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)
         response = chain.run(diff_context)
 
-    commit_message_approved = click.confirm("Do you want to use this commit message (type n to edit)?", default=True)
+    commit_message_approved = click.confirm(
+        "Do you want to use this commit message (type n to edit)?", default=True
+    )
 
     # Write the commit message to a temporary file
     with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp:
@@ -349,7 +358,7 @@ def fun_fact(verbose, response_token_size):
         # Set up the chain
         chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)
 
-        year = random.randint(1942, datetime.datetime.utcnow().year)
+        year = random.randint(1942, datetime.utcnow().year)
         chain.run(f"programming and artificial intelligence in the year {year}")
 
 
@@ -368,7 +377,7 @@ def learn(repo_url, verbose):
 
     owner, repo_name = Coder.parse_github_url(repo_url)
 
-    start_time = datetime.datetime.utcnow()
+    start_time = datetime.utcnow()
 
     local_data_dir = get_local_data_dir()
 
@@ -382,7 +391,7 @@ def learn(repo_url, verbose):
 
     with console.status("Storing the repo in the vector store", spinner=DEFAULT_SPINNER):
         store_documents(documents, vector_store_dir)
-    console.print(f"✅ Repo loaded and indexed in {datetime.datetime.utcnow() - start_time} seconds.")
+    console.print(f"✅ Repo loaded and indexed in {datetime.utcnow() - start_time} seconds.")
 
 
 @cli.command
@@ -433,7 +442,9 @@ def review(commit, verbose, output_format, response_token_size, files):
 
     else:
         # Stream live
-        console.print("Examining the diff and generating the review for the following files:\n\t" + "\n\t".join(files))
+        console.print(
+            "Examining the diff and generating the review for the following files:\n\t" + "\n\t".join(files)
+        )
         with Live(Markdown(""), auto_refresh=True) as live:
             llm.streaming = True
             llm.callbacks = [RichLiveCallbackHandler(live, bot_style)]
@@ -446,7 +457,7 @@ def review(commit, verbose, output_format, response_token_size, files):
 @click.option("-v", "--verbose", count=True)
 @click.option("-t", "--response-token-size", type=int, default=DEFAULT_MAX_TOKENS * 3)
 @click.argument("files", nargs=-1)
-def sidekick(request, verbose, response_token_size, files):
+def sidekick(request, verbose, response_token_size, files):  # noqa: PLR0915
     """
     EXPERIMENTAL: Coding help from your AI sidekick\n
     FILES: List of files to be used as context for the session
@@ -462,6 +473,16 @@ def sidekick(request, verbose, response_token_size, files):
     # git history
     context = generate_files_context(files)
 
+    def show_file_context(files):
+        console.print("Files loaded in this session:")
+        for file in files:
+            token_length = Coder.get_token_length(Path(file).read_text())
+            console.print(f"\t{file} ({humanize.intcomma(token_length)} tokens)")
+
+    if files:
+        files = set(files)  # Dedupe
+        show_file_context(files)
+
     # Generate the prompt and set up the model
     prompt = get_prompt("sidekick")
     memory_token_size = response_token_size * 2  # Allow decent history
@@ -474,33 +495,64 @@ def sidekick(request, verbose, response_token_size, files):
 
     llm = Coder.get_llm(model_name, verbose, response_token_size, streaming=True)
 
-    # Open the temporary file in the user's editor
-    editor = Path(os.getenv("EDITOR", "/usr/bin/vim")).name
-
     # Set up the chain
     memory = ConversationTokenBufferMemory(
         memory_key="chat_history", input_key="task", llm=llm, max_token_limit=memory_token_size
     )
     chain = LLMChain(llm=llm, prompt=prompt, memory=memory, verbose=verbose)
     history_file = Path.home() / ".aicodebot_request_history"
 
-    console.print(f"Enter a request OR (q) quit, OR (e) to edit using {editor}")
+    console.print(
+        "Enter a request for your AICodeBot sidekick. Type / to see available commands.\n", style=bot_style
+    )
     while True:  # continuous loop for multiple questions
         edited_input = None
         if request:
             human_input = request
         else:
-            human_input = input_prompt("🤖 ➤ ", history=FileHistory(history_file)).strip()
+            human_input = input_prompt("🤖 ➤ ", history=FileHistory(history_file), completer=SidekickCompleter())
+            human_input = human_input.strip()
             if not human_input:
                 # Must have been spaces or blank line
                 continue
-            elif len(human_input) == 1:
-                if human_input.lower() == "q":
-                    break
-                elif human_input.lower() == "e":
+
+            if human_input.startswith("/"):
+                cmd = human_input.lower().split()[0]
+                # Handle commands
+                if cmd in ["/add", "/drop"]:
+                    # Get the filename
+                    # If they didn't specify a file, then ignore
+                    try:
+                        filename = human_input.split()[1]
+                    except IndexError:
+                        continue
+
+                    # If the file doesn't exist, or we can't open it, let them know
+                    if not Path(filename).exists():
+                        console.print(f"File '{filename}' doesn't exist.", style=error_style)
+                        continue
+
+                    if cmd == "/add":
+                        files.add(filename)
+                        console.print(f"✅ Added '{filename}' to the list of files.")
+                    elif cmd == "/drop":
+                        # Drop the file from the list
+                        files.discard(filename)
+                        console.print(f"✅ Dropped '{filename}' from the list of files.")
+
+                    context = generate_files_context(files)
+                    show_file_context(files)
+                    continue
+                elif cmd == "/edit":
                     human_input = edited_input = click.edit()
+                elif cmd == "/files":
+                    show_file_context(files)
+                    continue
+                elif cmd == "/quit":
+                    break
+
             elif human_input.lower()[-2:] == r"\e":
-                # If the text ends with \e then we want to edit it
+                # If the text ends wit then we want to edit it
                 human_input = edited_input = click.edit(human_input[:-2])
 
             if edited_input:
@@ -535,5 +587,5 @@ def setup_config():
         return existing_config
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
     cli()
diff --git a/aicodebot/helpers.py b/aicodebot/helpers.py
@@ -1,6 +1,7 @@
 from langchain.callbacks.base import BaseCallbackHandler
 from loguru import logger
 from pathlib import Path
+from prompt_toolkit.completion import Completer, Completion
 from rich.markdown import Markdown
 import os, subprocess, sys
 
@@ -31,6 +32,29 @@ def create_and_write_file(filename, text, overwrite=False):
         f.write(text)
 
 
+class SidekickCompleter(Completer):
+    """A custom prompt_toolkit completer for sidekick."""
+
+    def get_completions(self, document, complete_event):
+        # Get the text before the cursor
+        text = document.text_before_cursor
+
+        supported_commands = ["/add", "/drop", "/edit", "/files", "/quit"]
+
+        # If the text starts with a slash, it's a command
+        if text.startswith("/"):
+            for command in supported_commands:
+                if command.startswith(text):
+                    yield Completion(command, start_position=-len(text))
+
+        if text.startswith(("/add ", "/drop ")):
+            # If the text starts with /add or /drop, it's a file
+            files = Path().rglob("*")
+            for file in files:
+                if file.name.startswith(text.split()[-1]):
+                    yield Completion(file.name, start_position=-len(text.split()[-1]))
+
+
 def exec_and_get_output(command):
     """Execute a command and return its output as a string."""
     logger.debug(f"Executing command: {' '.join(command)}")
diff --git a/aicodebot/learn.py b/aicodebot/learn.py
@@ -99,7 +99,9 @@ def store_documents(documents, vector_store_dir):
                 f"Processing {document.metadata['file_path']} as {language_extension_map[file_type].value} code"
             )
             splitter = RecursiveCharacterTextSplitter.from_language(
-                language=language_extension_map[document.metadata["file_type"].lower()], chunk_size=50, chunk_overlap=0
+                language=language_extension_map[document.metadata["file_type"].lower()],
+                chunk_size=50,
+                chunk_overlap=0,
             )
         else:
             # TODO: Check if it's a text file
@@ -119,7 +121,9 @@ def load_learned_repo(repo_name):
     """Load a vector store from a learned repo."""
     vector_store_file = Path(get_local_data_dir() / "vector_stores" / repo_name / "faiss_index")
     if not vector_store_file.exists():
-        raise ValueError(f"Vector store for {repo_name} does not exist. Please run `aicodebot learn $githuburl` first.")
+        raise ValueError(
+            f"Vector store for {repo_name} does not exist. Please run `aicodebot learn $githuburl` first."
+        )
 
     embeddings = OpenAIEmbeddings()
     return FAISS.load_local(vector_store_file, embeddings)
diff --git a/aicodebot/prompts.py b/aicodebot/prompts.py
@@ -87,7 +87,9 @@
     ),
     "Her": SimpleNamespace(name="Her", prompt=HER, description="The AI character from the movie Her"),
     "Jules": SimpleNamespace(
-        name="Jules", prompt=JULES, description="Samuel L. Jackson's character from Pulp Fiction (warning: profanity))"
+        name="Jules",
+        prompt=JULES,
+        description="Samuel L. Jackson's character from Pulp Fiction (warning: profanity))",
     ),
     "Michael": SimpleNamespace(
         name="Michael", prompt=MICHAEL, description="Michael Scott from The Office (warning: TWSS))"
@@ -183,11 +185,6 @@ def generate_files_context(files):
     files_context += "Here are the relevant files we are working with in this session:\n"
     for file_name in files:
         contents = Path(file_name).read_text()
-        token_length = Coder.get_token_length(contents)
-        if token_length > 2_000:
-            logger.warning(f"File {file_name} is large, using {token_length} tokens")
-        else:
-            logger.debug(f"File {file_name} is {token_length} tokens")
         files_context += f"--- START OF FILE: {file_name} ---\n"
         files_context += contents
         files_context += f"\n--- END OF FILE: {file_name} ---\n\n"
@@ -341,7 +338,9 @@ def get_prompt(command, structured_output=False):
             "commit": PromptTemplate(template=COMMIT_TEMPLATE, input_variables=["diff_context"]),
             "debug": PromptTemplate(template=DEBUG_TEMPLATE, input_variables=["command_output"]),
             "fun_fact": PromptTemplate(template=FUN_FACT_TEMPLATE, input_variables=["topic"]),
-            "sidekick": PromptTemplate(template=SIDEKICK_TEMPLATE, input_variables=["chat_history", "task", "context"]),
+            "sidekick": PromptTemplate(
+                template=SIDEKICK_TEMPLATE, input_variables=["chat_history", "task", "context"]
+            ),
         }
 
         try:
diff --git a/requirements/requirements.in b/requirements/requirements.in
@@ -9,6 +9,7 @@ beautifulsoup4 # needed by langchain
 click # command line interface helpers
 faiss-cpu
 GitPython
+humanize
 langchain
 loguru
 openai
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
-#    pip-compile requirements.in
+#    pip-compile
 #
 aiohttp==3.8.4
     # via
@@ -15,39 +15,39 @@ async-timeout==4.0.2
 attrs==23.1.0
     # via aiohttp
 beautifulsoup4==4.12.2
-    # via -r requirements/requirements.in
+    # via -r requirements.in
 certifi==2023.5.7
     # via requests
 charset-normalizer==3.1.0
     # via
     #   aiohttp
     #   requests
 click==8.1.6
-    # via -r requirements/requirements.in
+    # via -r requirements.in
 dataclasses-json==0.5.8
     # via langchain
 faiss-cpu==1.7.4
-    # via -r requirements/requirements.in
+    # via -r requirements.in
 frozenlist==1.3.3
     # via
     #   aiohttp
     #   aiosignal
 gitdb==4.0.10
     # via gitpython
 gitpython==3.1.32
-    # via -r requirements/requirements.in
-greenlet==2.0.2
-    # via sqlalchemy
+    # via -r requirements.in
+humanize==4.7.0
+    # via -r requirements.in
 idna==3.4
     # via
     #   requests
     #   yarl
 langchain==0.0.238
-    # via -r requirements/requirements.in
+    # via -r requirements.in
 langsmith==0.0.11
     # via langchain
 loguru==0.7.0
-    # via -r requirements/requirements.in
+    # via -r requirements.in
 markdown-it-py==3.0.0
     # via rich
 marshmallow==3.19.0
@@ -71,13 +71,13 @@ numpy==1.25.0
     #   langchain
     #   numexpr
 openai==0.27.8
-    # via -r requirements/requirements.in
+    # via -r requirements.in
 openapi-schema-pydantic==1.2.4
     # via langchain
 packaging==23.1
     # via marshmallow
 prompt-toolkit==3.0.39
-    # via -r requirements/requirements.in
+    # via -r requirements.in
 pydantic==1.10.9
     # via
     #   langchain
@@ -87,7 +87,7 @@ pygments==2.15.1
     # via rich
 pyyaml==6.0.1
     # via
-    #   -r requirements/requirements.in
+    #   -r requirements.in
     #   langchain
 regex==2023.6.3
     # via tiktoken
@@ -98,7 +98,7 @@ requests==2.31.0
     #   openai
     #   tiktoken
 rich==13.4.2
-    # via -r requirements/requirements.in
+    # via -r requirements.in
 smmap==5.0.0
     # via gitdb
 soupsieve==2.4.1
@@ -108,7 +108,7 @@ sqlalchemy==2.0.16
 tenacity==8.2.2
     # via langchain
 tiktoken==0.4.0
-    # via -r requirements/requirements.in
+    # via -r requirements.in
 tqdm==4.65.0
     # via openai
 typing-extensions==4.6.3