Streamlit PDF Application Setup All Commands in One Single File
Streamlit PDF Application Setup All Commands in One Single File
==========================
COMPLETE STEP BY STEP GUIDE FOR PREPARING STREAMLIT APPLICATION FOR PDF FILES
===================================================================================
==========================
Step 1:
~~~~~~~
Step 2:
~~~~~~~
sqlplus baskar/WElcome__123@VECTORDB
Step 3:
~~~~~~~
BEGIN
DBMS_NETWORK_ACL_ADMIN.APPEND_HOST_ACE(
host => '*',
ace => xs$ace_type(
privilege_list => xs$name_list('connect'),
principal_name => 'baskar',
principal_type => xs_acl.ptype_db));
END;
/
Step 4:
~~~~~~~
BEGIN
DBMS_VECTOR.DROP_CREDENTIAL('OCI_GENAI_BASBABU_CRED');
EXCEPTION
WHEN OTHERS THEN NULL;
END;
/
DECLARE
jo json_object_t;
BEGIN
jo := json_object_t();
jo.put('user_ocid', '');
jo.put('tenancy_ocid', '');
jo.put('compartment_ocid', '');
jo.put('private_key', '');
jo.put('fingerprint', '');
dbms_output.put_line(jo.to_string);
dbms_vector.create_credential(
credential_name => 'OCI_GENAI_BASKAR_CRED',
params => json(jo.to_string));
END;
/
Step 5:
~~~~~~~
cd /home/oracle/
vi .env
COMPARTMENT_OCID=ocid1.compartment.oc1..aaaaaaaa4e4qshiempprixk6pvgdk26txt5dy4dhtip
iwjjkmrre4wozi6lq
[DATABASE]
username=baskar
password=WElcome__123
dsn=localhost:1521/freepdb1
Step 6:
~~~~~~~
sqlplus baskar/WElcome__123@VECTORDB
Step 7:
~~~~~~
Step 8:
~~~~~~~
# Perform the Vectorization for the PDF files and store it in VECTOR_STORE table
Step 9:
~~~~~~~
WITH query_vector AS (
SELECT VECTOR_EMBEDDING(doc_model USING 'What is Oracle Analytics?' AS
data) as embedding)
SELECT embed_id, embed_data
FROM VECTOR_STORE, query_vector
ORDER BY VECTOR_DISTANCE(EMBED_VECTOR, query_vector.embedding, COSINE)
FETCH FIRST 4 ROWS ONLY;
Step 10:
~~~~~~~~
# Create a Function to accept the User Question and perform the Similarity Search
with the existing PDF files.
# This function will be called in streamlit python program
UNDEFINE
SET SERVEROUTPUT ON;
SET ECHO ON
SET FEEDBACK 1
SET NUMWIDTH 10
SET LINESIZE 80
SET TRIMSPOOL ON
SET TAB OFF
SET PAGESIZE 10000
SET LONG 10000
create or replace FUNCTION generate_text_response_gen(user_question IN VARCHAR2,
docid number) RETURN CLOB IS
user_question_vec VECTOR;
oci_genai_params CLOB;
context CLOB;
prompt CLOB;
response CLOB;
BEGIN
select to_vector(vector_embedding(doc_model USING user_question as data)) as
embedding into user_question_vec;
context := '';
FOR rec IN (SELECT embed_data FROM VECTOR_STORE where doc_id = docid order by
vector_distance(embed_vector, user_question_vec, COSINE) FETCH FIRST 4 ROWS ONLY)
LOOP
context := context || rec.embed_data;
END LOOP;
prompt := 'Answer the following question using the supplied context: '||
user_question || ' Context: ' || context;
prompt := RTRIM(prompt, ',' || CHR(10));
oci_genai_params := '{"provider": "ocigenai", "credential_name":
"OCI_GENAI_BASKAR_CRED", "url": "https://inference.generativeai.us-chicago-
1.oci.oraclecloud.com/20231130/actions/summarizeText", "model": "cohere.command"}';
response := DBMS_VECTOR_CHAIN.UTL_TO_SUMMARY(prompt, json(oci_genai_params));
RETURN response;
END;
/
Test the Response Output by passing the question and pdf docid to this Function:
Step 11:
~~~~~~~~
# Create a Procedure that will accept the new PDF file from User and store it in
the MY_BOOKS table.
# This procedure will be called in streamlit python program
Step 12:
~~~~~~~~
# Create a Trigger which will do the Vectorization for the newly uploaded PDF file
and store the vector embeddings in VECTOR_STORE table
# This trigger will be called in streamlit python program
Step 13:
~~~~~~~~~
cd /home/oracle
vi pdf_vector_application.py
import oracledb
from dotenv import load_dotenv
import os
import requests
import time
from PyPDF2 import PdfReader
import streamlit as st
def call_insert_my_table_row(conn23c, pdf_name, pdf_size, pdf_type, pdf_content):
# Prepare and execute the stored procedure call
try:
cursor = conn23c.cursor()
new_id = cursor.var(oracledb.NUMBER)
cursor.callproc("insert_my_table_row", [pdf_name, pdf_size, pdf_type,
pdf_content, new_id])
conn23c.commit()
print("Procedure executed successfully!"+ str(new_id.getvalue()))
return new_id.getvalue()
except oracledb.DatabaseError as e:
error, = e.args
print ("Error:", error.message)
return None
finally:
cursor.close()
def main():
load_dotenv()
st.set_page_config(page_title="Ask Question Based on PDF")
st.info("Oracle AI Vector Search with OCI GenAI LLM")
st.header("Ask your question to get answers based on your pdf")
username = os.getenv("username")
password = os.getenv("password")
dsn = os.getenv("dsn")
try:
conn23c = oracledb.connect(user=username, password=password, dsn=dsn)
print ("Connection successful!")
except Exception as e:
print ("Connection failed!")
if __name__ == '__main__':
main()
Step 14:
~~~~~~~~~
# Open VNC Viewer and login to Oracle OS user.
# Invoke the Python Program with streamlit command.
cd /home/oracle
streamlit run pdf_vector_application.py
Once the streamlit is running, open the mozilla browser inside the VNC Viewer
Then, in the browser, type
localhost:8501/
Note:
Already uploaded "Oracle_Analytics_Server.pdf" file as Doc ID: 1. So, upload
different documents.
Step 15:
~~~~~~~~
# Cleanup the schema and files