Enhance language identification and context generation 🧠

TechNickAI · TechNickAI · commit cb6844fc1331 · 2023-07-23T11:53:00.000-07:00
Incorporated language identification in the commit and sidekick functions of cli.py and updated the expert software engineer prompt in prompts.py to reflect multiple languages. This change allows for a more context-aware and language-specific code analysis and commit message generation. 🌐
diff --git a/aicodebot/cli.py b/aicodebot/cli.py
@@ -120,6 +120,7 @@ def commit(verbose, response_token_size, yes, skip_pre_commit, files):  # noqa:
         files = staged_files
 
     diff_context = Coder.git_diff_context()
+    languages = ",".join(Coder.identify_languages(files))
     if not diff_context:
         console.print("No changes to commit. 🤷")
         return
@@ -166,7 +167,7 @@ def commit(verbose, response_token_size, yes, skip_pre_commit, files):  # noqa:
 
         # Set up the chain
         chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)
-        response = chain.run(diff_context)
+        response = chain.run({"diff_context": diff_context, "languages": languages})
 
     commit_message_approved = click.confirm(
         "Do you want to use this commit message (type n to edit)?", default=True
@@ -333,7 +334,7 @@ def debug(command, verbose):
 
         # Set up the chain
         chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)
-        chain.run(error_output)
+        chain.run({"error_output": error_output, "languages": ["unix", "bash", "shell"]})
 
     sys.exit(process.returncode)
 
@@ -421,6 +422,7 @@ def review(commit, verbose, output_format, response_token_size, files):
     if not diff_context:
         console.print("No changes detected for review. 🤷")
         return
+    languages = ",".join(Coder.identify_languages(files))
 
     # Load the prompt
     prompt = get_prompt("review", structured_output=output_format == "json")
@@ -437,7 +439,7 @@ def review(commit, verbose, output_format, response_token_size, files):
 
     if output_format == "json":
         with console.status("Examining the diff and generating the review", spinner=DEFAULT_SPINNER):
-            response = chain.run(diff_context)
+            response = chain.run({"diff_context": diff_context, "languages": languages})
 
         parsed_response = prompt.output_parser.parse(response)
         data = {
@@ -458,7 +460,7 @@ def review(commit, verbose, output_format, response_token_size, files):
             llm.streaming = True
             llm.callbacks = [RichLiveCallbackHandler(live, bot_style)]
 
-            chain.run(diff_context)
+            chain.run({"diff_context": diff_context, "languages": languages})
 
 
 @cli.command
@@ -481,6 +483,7 @@ def sidekick(request, verbose, response_token_size, files):  # noqa: PLR0915
     # Style guides/reference code
     # git history
     context = generate_files_context(files)
+    languages = ",".join(Coder.identify_languages(files))
 
     def show_file_context(files):
         console.print("Files loaded in this session:")
@@ -552,6 +555,7 @@ def show_file_context(files):
                     console.print(f"✅ Dropped '{filename}' from the list of files.")
 
                 context = generate_files_context(files)
+                languages = ",".join(Coder.identify_languages(files))
                 show_file_context(files)
                 continue
 
@@ -577,7 +581,7 @@ def show_file_context(files):
             callback = RichLiveCallbackHandler(live, bot_style)
             llm.callbacks = [callback]  # a fresh callback handler for each question
 
-            chain.run({"task": human_input, "context": context})
+            chain.run({"task": human_input, "context": context, "languages": languages})
 
         if request:
             # If we were given a request, then we only want to run once
diff --git a/aicodebot/prompts.py b/aicodebot/prompts.py
@@ -188,9 +188,8 @@ def get_personality_prompt():
 
 EXPERT_SOFTWARE_ENGINEER = """
 You are an expert software engineer, versed in many programming languages,
-especially Python. You follow software development best practices and you know how to
+especially {languages}. You follow software development best practices and you know how to
 write clean, maintainable code. You are a champion for code quality.
-You are terse and to the point.
 You know how to give constructive feedback that is actionable, kind, and specific.
 """
 
@@ -276,14 +275,15 @@ def generate_files_context(files):
     END DIFF
 
     Instructions for the commit message:
-    * Start with a short summary (<72 characters).
+    * Start with a short summary (less than 72 characters).
     * Follow with a blank line and detailed text, but only if necessary. If the summary is sufficient,
         then omit the detailed text.
     * Use imperative mood (e.g., "Add feature").
     * Be in GitHub-flavored markdown format.
-    * Include contextually appropriate emojis (optional), but don't over do it.
     * Have a length that scales with the length of the diff context. If the DIFF is a small change,
       respond quickly with a terse message so we can go faster.
+    * Do not repeat information that is already known from the git commit.
+    * Be terse.
 
     BEGIN SAMPLE COMMIT MESSAGE
     Update README with better instructions for installation
@@ -293,8 +293,9 @@ def generate_files_context(files):
     new users get started faster.
     END SAMPLE COMMIT MESSAGE
 
+    Formatting instructions:
     Start your response with the commit message. No prefix or introduction.
-    Your entire response will be the commit message.
+    Your entire response will be the commit message. No quotation marks.
 """
 )
 
@@ -355,7 +356,7 @@ def generate_files_context(files):
     * "COMMENTS" - there were some issues found, but they should not block the build and are informational only
     * "FAILED" - there were serious, blocking issues found that should be fixed before merging the code
 
-    The review message should be a markdown-formatted string for display with rich.Markdown or GitHub markdown.
+    The review message should be a markdown-formatted string for display with GitHub markdown.
 """
 )
 
@@ -368,24 +369,24 @@ def get_prompt(command, structured_output=False):
             parser = PydanticOutputParser(pydantic_object=ReviewResult)
             return PromptTemplate(
                 template=REVIEW_TEMPLATE + "\n{format_instructions}",
-                input_variables=["diff_context"],
+                input_variables=["diff_context", "languages"],
                 partial_variables={"format_instructions": parser.get_format_instructions()},
                 output_parser=parser,
             )
         else:
             return PromptTemplate(
                 template=REVIEW_TEMPLATE + "\nRespond in markdown format",
-                input_variables=["diff_context"],
+                input_variables=["diff_context", "languages"],
             )
 
     else:
         prompt_map = {
             "alignment": PromptTemplate(template=ALIGNMENT_TEMPLATE, input_variables=[]),
-            "commit": PromptTemplate(template=COMMIT_TEMPLATE, input_variables=["diff_context"]),
-            "debug": PromptTemplate(template=DEBUG_TEMPLATE, input_variables=["command_output"]),
+            "commit": PromptTemplate(template=COMMIT_TEMPLATE, input_variables=["diff_context", "languages"]),
+            "debug": PromptTemplate(template=DEBUG_TEMPLATE, input_variables=["command_output", "languages"]),
             "fun_fact": PromptTemplate(template=FUN_FACT_TEMPLATE, input_variables=["topic"]),
             "sidekick": PromptTemplate(
-                template=SIDEKICK_TEMPLATE, input_variables=["chat_history", "task", "context"]
+                template=SIDEKICK_TEMPLATE, input_variables=["chat_history", "task", "context", "languages"]
             ),
         }