Updated 5_minutes_RAG_no_GPU (NVIDIA#239)

abigailkufeldt · web-flow · commit 169abdddecd5 · 2024-11-06T16:18:27.000-08:00
* updated requirements.txt for 5-min-rag-no-gpu

* add style.css file for 5-min-rag-no-gpu

* add Streamlit config folder for 5-min-rag-no-gpu

* updated README.md for 5-min-rag-no-gpu

* updated UI and certain deprecated models and functions in main.py for 5-min-rag-no-gpu
diff --git a/community/5_mins_rag_no_gpu/.streamlit/config.toml b/community/5_mins_rag_no_gpu/.streamlit/config.toml
@@ -0,0 +1,9 @@
+[client]
+showErrorDetails = false
+
+[theme]
+primaryColor = "#76b900"
+backgroundColor = "white"
+
+[browser]
+gatherUsageStats = false
diff --git a/community/5_mins_rag_no_gpu/README.md b/community/5_mins_rag_no_gpu/README.md
@@ -1,17 +1,13 @@
-# RAG in 5 Minutes
+# Tutorial for a Generic RAG-Based Chatbot
 
-This implementation is tied to the [YouTube video on NVIDIA Developer](https://youtu.be/N_OOfkEWcOk).
+This is a tutorial for how to build your own generic RAG chatbot. It is intended as a foundation for building more complex, domain-specific RAG bots. Note that no GPU is needed to run this as it is using NIMs from the NVIDIA catalog.
 
-This is a simple standalone implementation showing a minimal RAG pipeline that uses models available from [NVIDIA API Catalog](https://catalog.ngc.nvidia.com/ai-foundation-models).
-The catalog enables you to experience state-of-the-art LLMs accelerated by NVIDIA.
-Developers get free credits for 10K requests to any of the models.
+## Acknowledgements
 
-The example uses an [integration package to LangChain](https://python.langchain.com/docs/integrations/providers/nvidia) to access the models.
-NVIDIA engineers develop, test, and maintain the open source integration.
-This example uses a simple [Streamlit](https://streamlit.io/) based user interface and has a one-file implementation.
-Because the example uses the models from the NVIDIA API Catalog, you do not need a GPU to run the example.
+ - This implementation is based on [Rag in 5 Minutes](https://github.com/NVIDIA/GenerativeAIExamples/tree/4e86d75c813bcc41d4e92e430019053920d08c94/community/5_mins_rag_no_gpu), with changes primarily made to the UI.
+ - Alyssa Sawyer also contributed to updating and further developing this repo during her intern project, [Resume RAG Bot](https://github.com/alysawyer/resume-rag-nv), at NVIDIA.
 
-### Steps
+## Steps
 
 1. Create a python virtual environment and activate it:
 
@@ -20,10 +16,10 @@ Because the example uses the models from the NVIDIA API Catalog, you do not need
    source genai/bin/activate
    ```
 
-1. From the root of this repository, `GenerativeAIExamples`, install the requirements:
+1. From the root of this repository, install the requirements:
 
    ```console
-   pip install -r community/5_mins_rag_no_gpu/requirements.txt
+   pip install -r requirements.txt
    ```
 
 1. Add your NVIDIA API key as an environment variable:
@@ -32,17 +28,15 @@ Because the example uses the models from the NVIDIA API Catalog, you do not need
    export NVIDIA_API_KEY="nvapi-*"
    ```
 
-   If you don't already have an API key, visit the [NVIDIA API Catalog](https://build.ngc.nvidia.com/explore/), select on any model, then click on `Get API Key`.
+   If you don't already have an API key, visit the [NVIDIA API Catalog](https://build.ngc.nvidia.com/explore/), select on any model, then click on `Get API Key`. 
 
 1. Run the example using Streamlit:
 
    ```console
-   streamlit run community/5_mins_rag_no_gpu/main.py
+   streamlit run main.py
    ```
 
 1. Test the deployed example by going to `http://<host_ip>:8501` in a web browser.
 
-   Click **Browse Files** and select your knowledge source.
-   After selecting, click **Upload!** to complete the ingestion process.
-
-You are all set now! Try out queries related to the knowledge base using text from the user interface.
+   Click **Browse Files** and select the documents for your knowledge base.
+   After selecting, click **Upload!** to complete the ingestion process.
diff --git a/community/5_mins_rag_no_gpu/main.py b/community/5_mins_rag_no_gpu/main.py
@@ -13,110 +13,176 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# This is a simple standalone implementation showing rag pipeline using Nvidia AI Foundational models.
+# This is a simple standalone implementation showing rag pipeline using Nvidia AI Foundational Models.
 # It uses a simple Streamlit UI and one file implementation of a minimalistic RAG pipeline.
 
+
+############################################
+# Component #0.5 - UI / Header
+############################################
+
 import streamlit as st
 import os
-from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
-from langchain.text_splitter import CharacterTextSplitter
-from langchain_community.document_loaders import DirectoryLoader
-from langchain_community.vectorstores import FAISS
-import pickle
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import ChatPromptTemplate
 
-st.set_page_config(layout="wide")
+# Page settings 
+st.set_page_config(
+    layout="wide",
+    page_title="RAG Chatbot", 
+    page_icon = "🤖",
+    initial_sidebar_state="expanded")
+
+# Page title 
+st.header('Generic RAG Chatbot Demo 🤖📝', divider='rainbow')
+
+# Custom CSS
+def local_css(file_name):
+    with open(file_name, "r") as f:
+        st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
+local_css("style.css")
+
+# Page description 
+st.markdown('''Manually looking through vast amounts of data can be tedious and time-consuming. This chatbot can expedite that process by providing a platform to query your documents.''')
+st.warning("This is a proof of concept, and any output from the AI agent should be used in conjunction with the original data.", icon="⚠️")
+
+############################################
+# Component #1 - Document Loader
+############################################
 
-# Component #1 - Document Upload
 with st.sidebar:
+    st.subheader("Upload Your Documents")
+
     DOCS_DIR = os.path.abspath("./uploaded_docs")
+
+    # Make dir to store uploaded documents
     if not os.path.exists(DOCS_DIR):
         os.makedirs(DOCS_DIR)
+
+    # Define form on Streamlit page for uploading files to KB
     st.subheader("Add to the Knowledge Base")
     with st.form("my-form", clear_on_submit=True):
         uploaded_files = st.file_uploader("Upload a file to the Knowledge Base:", accept_multiple_files=True)
         submitted = st.form_submit_button("Upload!")
 
+    # Acknowledge successful file uploads
     if uploaded_files and submitted:
         for uploaded_file in uploaded_files:
             st.success(f"File {uploaded_file.name} uploaded successfully!")
             with open(os.path.join(DOCS_DIR, uploaded_file.name), "wb") as f:
                 f.write(uploaded_file.read())
 
-# Component #2 - Embedding Model and LLM
-llm = ChatNVIDIA(model="meta/llama3-70b-instruct")
-document_embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5", model_type="passage")
+############################################
+# Component #2 - Initalizing Embedding Model and LLM
+############################################
 
+from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
+ 
+#Make sure to export your NGC NV-Developer API key as NVIDIA_API_KEY! 
+API_KEY = os.environ['NVIDIA_API_KEY']
+
+# Select embedding model and LLM
+document_embedder = NVIDIAEmbeddings(model="NV-Embed-QA", api_key=API_KEY, model_type="passage", truncate="END")
+llm = ChatNVIDIA(model="meta/llama3-70b-instruct", api_key=API_KEY, temperature=0)
+
+############################################
 # Component #3 - Vector Database Store
+############################################
+
+import pickle
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import DirectoryLoader
+from langchain_community.vectorstores import FAISS
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.retrievers import BaseRetriever
+
+# Option for using an existing vector store
 with st.sidebar:
     use_existing_vector_store = st.radio("Use existing vector store if available", ["Yes", "No"], horizontal=True)
 
-vector_store_path = "vectorstore.pkl"
+# Load raw documents from the directory
+DOCS_DIR = os.path.abspath("./uploaded_docs")
 raw_documents = DirectoryLoader(DOCS_DIR).load()
 
+# Check for existing vector store file
+vector_store_path = "vectorstore.pkl"
 vector_store_exists = os.path.exists(vector_store_path)
 vectorstore = None
+
 if use_existing_vector_store == "Yes" and vector_store_exists:
+    # Load existing vector store
     with open(vector_store_path, "rb") as f:
         vectorstore = pickle.load(f)
     with st.sidebar:
-        st.success("Existing vector store loaded successfully.")
+        st.info("Existing vector store loaded successfully.")
 else:
     with st.sidebar:
         if raw_documents and use_existing_vector_store == "Yes":
+            # Chunk documents
             with st.spinner("Splitting documents into chunks..."):
-                text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=200)
+                text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=100)
                 documents = text_splitter.split_documents(raw_documents)
 
+            # Convert document chunks to embeddings, and save in a vector store
             with st.spinner("Adding document chunks to vector database..."):
                 vectorstore = FAISS.from_documents(documents, document_embedder)
 
+            # Save vector store
             with st.spinner("Saving vector store"):
                 with open(vector_store_path, "wb") as f:
                     pickle.dump(vectorstore, f)
             st.success("Vector store created and saved.")
         else:
             st.warning("No documents available to process!", icon="⚠️")
 
+############################################
 # Component #4 - LLM Response Generation and Chat
-st.subheader("Chat with your AI Assistant, Envie!")
+############################################
+
+st.subheader("Query your data")
 
+# Save chat history for this user session
 if "messages" not in st.session_state:
     st.session_state.messages = []
 
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 
+# Define prompt for LLM
 prompt_template = ChatPromptTemplate.from_messages([
-    ("system", "You are a helpful AI assistant named Envie. If provided with context, use it to inform your responses. If no context is available, use your general knowledge to provide a helpful response."),
+    ("system", "You are a helpful AI assistant. Use the provided context to inform your responses. If no context is available, please state that."),
     ("human", "{input}")
 ])
 
+# Define simple prompt chain 
 chain = prompt_template | llm | StrOutputParser()
 
-user_input = st.chat_input("Can you tell me what NVIDIA is known for?")
+# Display an example query for user 
+user_query = st.chat_input("Please summarize these documents.")
 
-if user_input:
-    st.session_state.messages.append({"role": "user", "content": user_input})
+if user_query:
+    st.session_state.messages.append({"role": "user", "content": user_query})
     with st.chat_message("user"):
-        st.markdown(user_input)
+        st.markdown(user_query)
 
     with st.chat_message("assistant"):
         message_placeholder = st.empty()
         full_response = ""
 
         if vectorstore is not None and use_existing_vector_store == "Yes":
+            # Retrieve relevant chunks for the given user query from the vector store
             retriever = vectorstore.as_retriever()
-            docs = retriever.invoke(user_input)
-            context = "\n\n".join([doc.page_content for doc in docs])
-            augmented_user_input = f"Context: {context}\n\nQuestion: {user_input}\n"
+            retrieved_docs = retriever.invoke(user_query)
+
+            # Concatenate retrieved chunks together as context for LLM
+            context = "\n\n".join([doc.page_content for doc in retrieved_docs])
+            augmented_user_input = f"Context: {context}\n\nQuestion: {user_query}\n"
         else:
-            augmented_user_input = f"Question: {user_input}\n"
+            augmented_user_input = f"Question: {user_query}\n"
 
+        # Get output from LLM
         for response in chain.stream({"input": augmented_user_input}):
             full_response += response
             message_placeholder.markdown(full_response + "▌")
         message_placeholder.markdown(full_response)
-    st.session_state.messages.append({"role": "assistant", "content": full_response})
+    st.session_state.messages.append({"role": "assistant", "content": full_response})
diff --git a/community/5_mins_rag_no_gpu/requirements.txt b/community/5_mins_rag_no_gpu/requirements.txt
@@ -1,5 +1,13 @@
-streamlit==1.30.0
+streamlit
 faiss-cpu==1.7.4
-langchain==0.1.20
 unstructured[all-docs]==0.11.2
+langchain
+langchain-community
+langchain-core
 langchain-nvidia-ai-endpoints
+langchain-text-splitters
+nltk==3.8.1
+numpy==1.23.5
+onnx==1.16.1 
+onnxruntime==1.15.1
+python-magic
diff --git a/community/5_mins_rag_no_gpu/style.css b/community/5_mins_rag_no_gpu/style.css
@@ -0,0 +1,73 @@
+/* style.css */
+
+/* custom footer */
+.footer {
+    text-align: center;
+    color: #666;
+    font-size: 14px;
+}
+
+/* NVIDIA green for headers */
+h1, h2, h3, h4, h5 {
+    color: #76b900;
+}
+
+
+/* add line when hovering over link */
+.hover-link {
+    text-decoration: none;
+    color: inherit;
+    position: relative;
+  }
+  
+.hover-link::after {
+    content: '';
+    position: absolute;
+    width: 100%;
+    height: 1px;
+    bottom: 0;
+    left: 0;
+    background-color: #000;
+    transform: scaleX(0);
+    transition: transform 0.3s ease-in-out;
+}
+
+.hover-link:hover::after {
+    transform: scaleX(1);   
+}
+
+/* Remove default formatting for links */
+a {
+    color: #666; 
+    text-decoration: none;
+}
+
+/* Remove streamlit bar */
+header {
+    visibility: hidden;
+}
+
+/* custom container */
+
+.custom-image-container img {
+    border-radius: 10px;
+}
+
+.custom-column-container {
+    background-color: #f0f0f0;
+    border-radius: 10px;
+    padding: 20px;
+}
+
+.custom-column-container .stMarkdown {
+    padding-right: 20px;
+}
+
+.streamlit-expanderHeader {
+    background-color: white;
+    color: #76b900; 
+}
+.streamlit-expanderContent {
+    background-color: white;
+    color: black; 
+}