diff --git a/CHANGELOG.md b/CHANGELOG.md index af30477c2..460da65a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,28 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.0] - 2024-02-22 + +### Added + +- [New dedicated notebooks](./docs/rag/jupyter_server.md) showcasing usage of cloud based Nvidia AI Playground based models using Langchain connectors as well as local model deployment using Huggingface. +- Upgraded milvus container version to enable GPU accelerated vector search. +- Added support to interact with models behind NeMo Inference Microservices using new model engines `nemo-embed` and `nemo-infer`. +- Added support to provide example specific collection name for vector databases using an environment variable named `COLLECTION_NAME`. +- Added `faiss` as a generic vector database solution behind `utils.py`. + +### Changed + +- Upgraded and changed base containers for all components to pytorch `23.12-py3`. +- Added langchain specific vector database connector in `utils.py`. +- Changed speech support to use single channel for Riva ASR and TTS. +- Changed `get_llm` utility in `utils.py` to return Langchain wrapper instead of Llmaindex wrappers. + +### Fixed + +- Fixed a bug causing empty rating in evaluation notebook +- Fixed document search implementation of query decomposition example. + ## [0.3.0] - 2024-01-22 ### Added @@ -53,4 +75,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - [Fixed issue #13](https://github.com/NVIDIA/GenerativeAIExamples/issues/13) of pipeline not able to answer questions unrelated to knowledge base -- [Fixed issue #12](https://github.com/NVIDIA/GenerativeAIExamples/issues/12) typechecking while uploading PDF files \ No newline at end of file +- [Fixed issue #12](https://github.com/NVIDIA/GenerativeAIExamples/issues/12) typechecking while uploading PDF files diff --git a/RetrievalAugmentedGeneration/Dockerfile b/RetrievalAugmentedGeneration/Dockerfile index 205785592..78a278313 100644 --- a/RetrievalAugmentedGeneration/Dockerfile +++ b/RetrievalAugmentedGeneration/Dockerfile @@ -1,22 +1,24 @@ ARG BASE_IMAGE_URL=nvcr.io/nvidia/pytorch -ARG BASE_IMAGE_TAG=23.08-py3 +ARG BASE_IMAGE_TAG=23.12-py3 FROM ${BASE_IMAGE_URL}:${BASE_IMAGE_TAG} ARG EXAMPLE_NAME COPY RetrievalAugmentedGeneration/__init__.py /opt/RetrievalAugmentedGeneration/ COPY RetrievalAugmentedGeneration/common /opt/RetrievalAugmentedGeneration/common -COPY RetrievalAugmentedGeneration/examples/${EXAMPLE_NAME} /opt/RetrievalAugmentedGeneration/example COPY integrations /opt/integrations COPY tools /opt/tools RUN apt-get update && apt-get install -y libpq-dev RUN --mount=type=bind,source=RetrievalAugmentedGeneration/requirements.txt,target=/opt/requirements.txt \ python3 -m pip install --no-cache-dir -r /opt/requirements.txt +COPY RetrievalAugmentedGeneration/examples/${EXAMPLE_NAME} /opt/RetrievalAugmentedGeneration/example RUN if [ -f "/opt/RetrievalAugmentedGeneration/example/requirements.txt" ] ; then \ python3 -m pip install --no-cache-dir -r /opt/RetrievalAugmentedGeneration/example/requirements.txt ; else \ echo "Skipping example dependency installation, since requirements.txt was not found" ; \ fi +RUN apt-get remove python3-pip + WORKDIR /opt ENTRYPOINT ["uvicorn", "RetrievalAugmentedGeneration.common.server:app"] diff --git a/RetrievalAugmentedGeneration/common/configuration.py b/RetrievalAugmentedGeneration/common/configuration.py index 238829634..bd80de789 100644 --- a/RetrievalAugmentedGeneration/common/configuration.py +++ b/RetrievalAugmentedGeneration/common/configuration.py @@ -114,6 +114,11 @@ class EmbeddingConfig(ConfigWizard): default=1024, help_txt="The required dimensions of the embedding model. Currently utilized for vector DB indexing.", ) + server_url: str = configfield( + "server_url", + default="localhost:9080", + help_txt="The url of the server hosting nemo embedding model", + ) @configclass diff --git a/RetrievalAugmentedGeneration/common/utils.py b/RetrievalAugmentedGeneration/common/utils.py index 13820ba27..b47aa965d 100644 --- a/RetrievalAugmentedGeneration/common/utils.py +++ b/RetrievalAugmentedGeneration/common/utils.py @@ -18,6 +18,7 @@ import base64 import logging from functools import lru_cache +from urllib.parse import urlparse from typing import TYPE_CHECKING, List, Optional logger = logging.getLogger(__name__) @@ -33,7 +34,7 @@ logger.error(f"psycogp2 import failed with error: {e}") try: - from sqlalchemy import make_url + from sqlalchemy.engine.url import make_url except Exception as e: logger.error(f"SQLalchemy import failed with error: {e}") @@ -55,15 +56,33 @@ try: from langchain.text_splitter import SentenceTransformersTokenTextSplitter from langchain.embeddings import HuggingFaceEmbeddings + from langchain.vectorstores import FAISS except Exception as e: logger.error(f"Langchain import failed with error: {e}") +try: + from langchain_core.vectorstores import VectorStore +except Exception as e: + logger.error(f"Langchain core import failed with error: {e}") + +try: + from langchain_community.vectorstores import PGVector + from langchain_community.vectorstores import Milvus +except Exception as e: + logger.error(f"Langchain community import failed with error: {e}") + try: from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings + from langchain_community.chat_models import ChatOpenAI except Exception as e: logger.error(f"NVIDIA AI connector import failed with error: {e}") +from langchain_core.embeddings import Embeddings +from langchain_core.language_models.chat_models import SimpleChatModel +from langchain.llms.base import LLM from integrations.langchain.llms.triton_trt_llm import TensorRTLLM +from integrations.langchain.llms.nemo_infer import NemoInfer +from integrations.langchain.embeddings.nemo_embed import NemoEmbeddings from RetrievalAugmentedGeneration.common import configuration if TYPE_CHECKING: @@ -102,8 +121,10 @@ def _postprocess_nodes( @lru_cache def set_service_context() -> None: """Set the global service context.""" + llm = LangChainLLM(get_llm()) + embedding = LangchainEmbedding(get_embedding_model()) service_context = ServiceContext.from_defaults( - llm=get_llm(), embed_model=get_embedding_model() + llm=llm, embed_model=embedding ) set_global_service_context(service_context) @@ -119,15 +140,19 @@ def get_config() -> "ConfigWizard": @lru_cache -def get_vector_index() -> VectorStoreIndex: +def get_vector_index(collection_name: str = "") -> VectorStoreIndex: """Create the vector db index.""" config = get_config() vector_store = None logger.info(f"Using {config.vector_store.name} as vector store") + if config.vector_store.name == "pgvector": - db_name = os.getenv('POSTGRES_DB', 'vector_db') + db_name = os.getenv('POSTGRES_DB', None) + if not collection_name: + collection_name = os.getenv('COLLECTION_NAME', "vector_db") connection_string = f"postgresql://{os.getenv('POSTGRES_USER', '')}:{os.getenv('POSTGRES_PASSWORD', '')}@{config.vector_store.url}/{db_name}" + logger.info(f"Using PGVector collection: {collection_name}") conn = psycopg2.connect(connection_string) conn.autocommit = True @@ -146,14 +171,17 @@ def get_vector_index() -> VectorStoreIndex: password=url.password, port=url.port, user=url.username, - table_name="document_store", - embed_dim=config.embeddings.dimensions, + table_name=collection_name, + embed_dim=config.embeddings.dimensions ) elif config.vector_store.name == "milvus": + if not collection_name: + collection_name = os.getenv('COLLECTION_NAME', "vector_db") + logger.info(f"Using milvus collection: {collection_name}") vector_store = MilvusVectorStore(uri=config.vector_store.url, dim=config.embeddings.dimensions, - collection_name="document_store_ivfflat", - index_config={"index_type": "IVF_FLAT", "nlist": config.vector_store.nlist}, + collection_name=collection_name, + index_config={"index_type": "GPU_IVF_FLAT", "nlist": config.vector_store.nlist}, search_config={"nprobe": config.vector_store.nprobe}, overwrite=False) else: @@ -161,6 +189,42 @@ def get_vector_index() -> VectorStoreIndex: return VectorStoreIndex.from_vector_store(vector_store) +def get_vectorstore_langchain(documents, document_embedder, collection_name: str = "") -> VectorStore: + """Create the vector db index for langchain.""" + + config = get_config() + + if config.vector_store.name == "faiss": + vectorstore = FAISS.from_documents(documents, document_embedder) + elif config.vector_store.name == "pgvector": + db_name = os.getenv('POSTGRES_DB', None) + if not collection_name: + collection_name = os.getenv('COLLECTION_NAME', "vector_db") + logger.info(f"Using PGVector collection: {collection_name}") + connection_string = f"postgresql://{os.getenv('POSTGRES_USER', '')}:{os.getenv('POSTGRES_PASSWORD', '')}@{config.vector_store.url}/{db_name}" + vectorstore = PGVector.from_documents( + embedding=document_embedder, + documents=documents, + collection_name=collection_name, + connection_string=connection_string, + ) + elif config.vector_store.name == "milvus": + if not collection_name: + collection_name = os.getenv('COLLECTION_NAME', "vector_db") + logger.info(f"Using milvus collection: {collection_name}") + url = urlparse(config.vector_store.url) + vectorstore = Milvus.from_documents( + documents, + document_embedder, + collection_name=collection_name, + connection_args={"host": url.hostname, "port": url.port} + ) + else: + raise ValueError(f"{config.vector_store.name} vector database is not supported") + logger.info("Vector store created and saved.") + return vectorstore + + @lru_cache def get_doc_retriever(num_nodes: int = 4) -> "BaseRetriever": """Create the document retriever.""" @@ -169,7 +233,7 @@ def get_doc_retriever(num_nodes: int = 4) -> "BaseRetriever": @lru_cache -def get_llm() -> LangChainLLM: +def get_llm() -> LLM | SimpleChatModel: """Create the LLM connection.""" settings = get_config() @@ -180,15 +244,30 @@ def get_llm() -> LangChainLLM: model_name=settings.llm.model_name, tokens=DEFAULT_NUM_TOKENS, ) - return LangChainLLM(llm=trtllm) + return trtllm elif settings.llm.model_engine == "nv-ai-foundation": return ChatNVIDIA(model=settings.llm.model_name) + elif settings.llm.model_engine == "nemo-infer": + nemo_infer = NemoInfer( + server_url=f"http://{settings.llm.server_url}/v1/completions", + model=settings.llm.model_name, + tokens=DEFAULT_NUM_TOKENS, + ) + return nemo_infer + elif settings.llm.model_engine == "nemo-infer-openai": + nemo_infer = ChatOpenAI( + openai_api_base=f"http://{settings.llm.server_url}/v1/", + openai_api_key="xyz", + model_name=settings.llm.model_name, + max_tokens=DEFAULT_NUM_TOKENS, + ) + return nemo_infer else: raise RuntimeError("Unable to find any supported Large Language Model server. Supported engines are triton-trt-llm and nv-ai-foundation.") @lru_cache -def get_embedding_model() -> LangchainEmbedding: +def get_embedding_model() -> Embeddings: """Create the embedding model.""" model_kwargs = {"device": "cpu"} if torch.cuda.is_available(): @@ -205,9 +284,15 @@ def get_embedding_model() -> LangchainEmbedding: encode_kwargs=encode_kwargs, ) # Load in a specific embedding model - return LangchainEmbedding(hf_embeddings) + return hf_embeddings elif settings.embeddings.model_engine == "nv-ai-foundation": return NVIDIAEmbeddings(model=settings.embeddings.model_name, model_type="passage") + elif settings.embeddings.model_engine == "nemo-embed": + nemo_embed = NemoEmbeddings( + server_url=f"http://{settings.embeddings.server_url}/v1/embeddings", + model_name=settings.embeddings.model_name, + ) + return nemo_embed else: raise RuntimeError("Unable to find any supported embedding model. Supported engine is huggingface.") diff --git a/RetrievalAugmentedGeneration/examples/developer_rag/chains.py b/RetrievalAugmentedGeneration/examples/developer_rag/chains.py index 4c9cb8cc4..a3184bc08 100644 --- a/RetrievalAugmentedGeneration/examples/developer_rag/chains.py +++ b/RetrievalAugmentedGeneration/examples/developer_rag/chains.py @@ -24,6 +24,8 @@ from llama_index.query_engine import RetrieverQueryEngine from llama_index.response.schema import StreamingResponse from llama_index.node_parser import LangchainNodeParser +from llama_index.llms import LangChainLLM +from llama_index.embeddings import LangchainEmbedding from RetrievalAugmentedGeneration.common.utils import ( LimitRetrievedNodesLength, @@ -91,7 +93,8 @@ def llm_chain(self, context: str, question: str, num_tokens: int) -> Generator[s ) logger.info(f"Prompt used for response generation: {prompt}") - response = get_llm().stream_complete(prompt, tokens=num_tokens) + llm = LangChainLLM(get_llm()) + response = llm.stream_complete(prompt, tokens=num_tokens) gen_response = (resp.delta for resp in response) return gen_response @@ -101,10 +104,16 @@ def rag_chain(self, prompt: str, num_tokens: int) -> Generator[str, None, None]: logger.info("Using rag to generate response from document") set_service_context() - if get_config().llm.model_engine == "triton-trt-llm": - get_llm().llm.tokens = num_tokens # type: ignore - else: - get_llm().llm.max_tokens = num_tokens + llm = LangChainLLM(get_llm()) + + try: + if get_config().llm.model_engine == "triton-trt-llm" or get_config().llm.model_engine == "nemo-infer": + llm.llm.tokens = num_tokens # type: ignore + else: + llm.llm.max_tokens = num_tokens + except Exception as e: + logger.error(f"Exception in setting llm tokens: {e}") + retriever = get_doc_retriever(num_nodes=4) qa_template = Prompt(get_config().prompts.rag_template) diff --git a/RetrievalAugmentedGeneration/examples/nvidia_ai_foundation/chains.py b/RetrievalAugmentedGeneration/examples/nvidia_ai_foundation/chains.py index 82886fbbd..251208f8b 100644 --- a/RetrievalAugmentedGeneration/examples/nvidia_ai_foundation/chains.py +++ b/RetrievalAugmentedGeneration/examples/nvidia_ai_foundation/chains.py @@ -25,7 +25,7 @@ from langchain_core.prompts import ChatPromptTemplate from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings from RetrievalAugmentedGeneration.common.base import BaseExample -from RetrievalAugmentedGeneration.common.utils import get_config, get_llm, get_embedding_model +from RetrievalAugmentedGeneration.common.utils import get_config, get_llm, get_embedding_model, get_vectorstore_langchain logger = logging.getLogger(__name__) DOCS_DIR = os.path.abspath("./uploaded_files") @@ -38,7 +38,6 @@ class NvidiaAIFoundation(BaseExample): def ingest_docs(self, file_name: str, filename: str): """Ingest documents to the VectorDB.""" - try: # TODO: Load embedding created in older conversation, memory persistance # We initialize class in every call therefore it should be global @@ -54,8 +53,7 @@ def ingest_docs(self, file_name: str, filename: str): if vectorstore: vectorstore.add_documents(documents) else: - vectorstore = FAISS.from_documents(documents, document_embedder) - logger.info("Vector store created and saved.") + vectorstore = get_vectorstore_langchain(documents, document_embedder) else: logger.warning("No documents available to process!") except Exception as e: @@ -106,8 +104,14 @@ def rag_chain(self, prompt: str, num_tokens: int) -> Generator[str, None, None]: try: if vectorstore != None: - retriever = vectorstore.as_retriever() - docs = retriever.get_relevant_documents(prompt) + try: + retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.25}) + docs = retriever.get_relevant_documents(prompt) + except NotImplementedError: + # Some retriever like milvus don't have similarity score threshold implemented + retriever = vectorstore.as_retriever() + docs = retriever.get_relevant_documents(prompt) + context = "" for doc in docs: @@ -134,8 +138,14 @@ def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: try: if vectorstore != None: - retriever = vectorstore.as_retriever() - docs = retriever.get_relevant_documents(content) + try: + retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.25}) + docs = retriever.get_relevant_documents(content) + except NotImplementedError: + # Some retriever like milvus don't have similarity score threshold implemented + retriever = vectorstore.as_retriever() + docs = retriever.get_relevant_documents(content) + result = [] for doc in docs: result.append( diff --git a/RetrievalAugmentedGeneration/examples/query_decomposition_rag/chains.py b/RetrievalAugmentedGeneration/examples/query_decomposition_rag/chains.py index 5d1a63fc3..c77b620cf 100644 --- a/RetrievalAugmentedGeneration/examples/query_decomposition_rag/chains.py +++ b/RetrievalAugmentedGeneration/examples/query_decomposition_rag/chains.py @@ -34,8 +34,7 @@ import json import jinja2 import os - -import os +import base64 import logging from typing import Generator, List @@ -44,6 +43,8 @@ get_llm, set_service_context, get_embedding_model, + get_doc_retriever, + get_vectorstore_langchain, ) from RetrievalAugmentedGeneration.common.base import BaseExample @@ -166,7 +167,7 @@ def ingest_docs(self, file_name: str, filename: str): if vectorstore: vectorstore.add_documents(documents) else: - vectorstore = FAISS.from_documents(documents, document_embedder) + vectorstore = get_vectorstore_langchain(documents, document_embedder) logger.info("Vector store created and saved.") else: logger.warning("No documents available to process!") @@ -191,8 +192,6 @@ def llm_chain( ] ) - llm = get_llm() - chain = prompt_template | llm | StrOutputParser() augmented_user_input = ( "Context: " + context + "\n\nQuestion: " + question + "\n" @@ -325,17 +324,20 @@ def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: """Search for the most relevant documents for the given search parameters.""" try: - retriever = get_doc_retriever(num_nodes=num_docs) - nodes = retriever.retrieve(content) - output = [] - for node in nodes: - file_name = nodes[0].metadata["filename"] - decoded_filename = base64.b64decode(file_name.encode("utf-8")).decode("utf-8") - entry = {"score": node.score, "source": decoded_filename, "content": node.text} - output.append(entry) - - return output - + if vectorstore != None: + retriever = vectorstore.as_retriever() + docs = retriever.get_relevant_documents(content) + + result = [] + for doc in docs: + result.append( + { + "source": os.path.basename(doc.metadata.get('source', '')), + "content": doc.page_content + } + ) + return result + return [] except Exception as e: logger.error(f"Error from /documentSearch endpoint. Error details: {e}") return [] diff --git a/RetrievalAugmentedGeneration/frontend/Dockerfile b/RetrievalAugmentedGeneration/frontend/Dockerfile index 5f8192db0..a0fda78d7 100644 --- a/RetrievalAugmentedGeneration/frontend/Dockerfile +++ b/RetrievalAugmentedGeneration/frontend/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/library/python:3.11-slim +FROM nvcr.io/nvidia/pytorch:23.12-py3 RUN mkdir /app COPY requirements.txt /app @@ -7,6 +7,8 @@ RUN apt-get update; \ python3 -m pip --no-cache-dir install -r /app/requirements.txt; \ python3 -m pip --no-cache-dir install nvidia-riva-client==2.14.0; \ apt-get clean + +RUN apt-get remove python3-pip USER 1001 COPY frontend /app/frontend diff --git a/RetrievalAugmentedGeneration/frontend/frontend/asr_utils.py b/RetrievalAugmentedGeneration/frontend/frontend/asr_utils.py index a15aefed2..3fb64d71d 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/asr_utils.py +++ b/RetrievalAugmentedGeneration/frontend/frontend/asr_utils.py @@ -16,7 +16,6 @@ import queue from threading import Thread -import os import logging import grpc import pycountry @@ -25,7 +24,6 @@ import riva.client import riva.client.proto.riva_asr_pb2 as riva_asr import riva.client.proto.riva_asr_pb2_grpc as rasr_srv -from google.protobuf import text_format class ASRSession: def __init__(self): @@ -37,50 +35,32 @@ def __init__(self): _LOGGER = logging.getLogger(__name__) -# Extract environmental variables -RIVA_API_URI = os.getenv("RIVA_API_URI", None) -RIVA_API_KEY = os.getenv("RIVA_API_KEY", None) -RIVA_FUNCTION_ID = os.getenv("RIVA_FUNCTION_ID", None) - -# Establish a connection to the Riva server -try: - use_ssl = False - metadata = [] - auth = None - if RIVA_API_KEY: - use_ssl = True - metadata.append(("authorization", "Bearer " + RIVA_API_KEY)) - if RIVA_FUNCTION_ID: - use_ssl = True - metadata.append(("function-id", RIVA_FUNCTION_ID)) - auth = riva.client.Auth( - None, use_ssl=use_ssl, - uri=RIVA_API_URI, - metadata_args=metadata - ) - _LOGGER.info('Created riva.client.Auth success') -except: - _LOGGER.info('Error creating riva.client.Auth') - # Obtain the ASR languages available on the Riva server ASR_LANGS = dict() +grpc_auth = None -try: - _LOGGER.info("Available ASR languages") - asr_client = riva.client.ASRService(auth) - config_response = asr_client.stub.GetRivaSpeechRecognitionConfig(riva_asr.RivaSpeechRecognitionConfigRequest()) - for model_config in config_response.model_config: - if model_config.parameters["decoder_type"] and model_config.model_name.endswith("streaming"): - language_code = model_config.parameters['language_code'] - language_name = f"{pycountry.languages.get(alpha_2=language_code[:2]).name} ({language_code})" - _LOGGER.info(f"{language_name} {model_config.model_name}") - ASR_LANGS[language_name] = {"language_code": language_code, "model": model_config.model_name} -except: - ASR_LANGS["No ASR languages available"] = "No ASR languages available" - gr.Info('The app could not find any available ASR languages. Thus, none will appear in the "ASR Language" dropdown menu. Check that you are connected to a Riva server with ASR enabled.') - _LOGGER.info('The app could not find any available ASR languages. Thus, none will appear in the "ASR Language" dropdown menu. Check that you are connected to a Riva server with ASR enabled.') - -ASR_LANGS = dict(sorted(ASR_LANGS.items())) +def asr_init(auth): + global ASR_LANGS + global grpc_auth + grpc_auth = auth + try: + _LOGGER.info("Available ASR languages") + asr_client = riva.client.ASRService(grpc_auth) + + config_response = asr_client.stub.GetRivaSpeechRecognitionConfig(riva_asr.RivaSpeechRecognitionConfigRequest()) + for model_config in config_response.model_config: + if model_config.parameters["decoder_type"] and model_config.model_name.endswith("streaming"): + language_code = model_config.parameters['language_code'] + language_name = f"{pycountry.languages.get(alpha_2=language_code[:2]).name} ({language_code})" + _LOGGER.info(f"{language_name} {model_config.model_name}") + ASR_LANGS[language_name] = {"language_code": language_code, "model": model_config.model_name} + except grpc.RpcError as e: + _LOGGER.info(e.details()) + ASR_LANGS["No ASR languages available"] = "No ASR languages available" + gr.Info('The app could not find any available ASR languages. Thus, none will appear in the "ASR Language" dropdown menu. Check that you are connected to a Riva server with ASR enabled.') + _LOGGER.info('The app could not find any available ASR languages. Thus, none will appear in the "ASR Language" dropdown menu. Check that you are connected to a Riva server with ASR enabled.') + + ASR_LANGS = dict(sorted(ASR_LANGS.items())) def print_streaming_response(asr_session): asr_session.transcript = "" @@ -102,7 +82,7 @@ def print_streaming_response(asr_session): asr_session.transcript = final_transcript + partial except grpc.RpcError as rpc_error: - _LOGGER.error(rpc_error.code(), rpc_error.details()) + print(rpc_error.details()) # TODO See if Gradio popup error mechanism can be used. # For now whow error via transcript text box. asr_session.transcript = rpc_error.details() @@ -123,13 +103,8 @@ def stop_recording(asr_session): pass return asr_session -def transcribe_streaming(audio, language, asr_session, auth=auth): +def transcribe_streaming(audio, language, asr_session): _LOGGER.info('transcribe_streaming') - - if auth == None: - _LOGGER.info('Riva client did not initialize properly. Skipping transcription.') - return None, None - if language == 'No ASR languages available': gr.Info('The app cannot access ASR services. Any attempt to transcribe audio will be unsuccessful. Check that you are connected to a Riva server with ASR enabled.') _LOGGER.info('The app cannot access ASR services. Any attempt to transcribe audio will be unsuccessful. Check that you are connected to a Riva server with ASR enabled.') @@ -159,8 +134,8 @@ def transcribe_streaming(audio, language, asr_session, auth=auth): interim_results=True, ) - _LOGGER.info(f'auth.channel = {auth.channel}') - rasr_stub = rasr_srv.RivaSpeechRecognitionStub(auth.channel) + rasr_stub = rasr_srv.RivaSpeechRecognitionStub(grpc_auth.channel) + asr_session.response_stream = rasr_stub.StreamingRecognize(iter(asr_session.request_queue.get, None)) # First buffer should contain only the config @@ -178,54 +153,3 @@ def transcribe_streaming(audio, language, asr_session, auth=auth): asr_session.request_queue.put(request) return asr_session.transcript, asr_session - -def transcribe_offline(audio, language, diarization, auth=auth): - _LOGGER.info('transcribe_offline') - - if auth == None: - _LOGGER.info('Riva client did not initialize properly. Skipping transcription.') - return None, None - - if language == 'No ASR languages available': - gr.Info('The app cannot access ASR services. Any attempt to transcribe audio will be unsuccessful. Check that you are connected to a Riva server with ASR enabled.') - _LOGGER.info('The app cannot access ASR services. Any attempt to transcribe audio will be unsuccessful. Check that you are connected to a Riva server with ASR enabled.') - return None, None - rate, data = audio - if len(data.shape) > 1: - data = np.mean(data, axis=1) - - if not len(data): - _LOGGER.info("Empty audio provided") - return None, None - - asr_dict = next((d for d in asr_config if d['asr_language_name'] == language), None) - - config = riva.client.RecognitionConfig( - encoding=riva.client.AudioEncoding.LINEAR_PCM, - sample_rate_hertz=rate, - audio_channel_count=1, - language_code=ASR_LANGS[language]['language_code'], - max_alternatives=1, - profanity_filter=False, - enable_automatic_punctuation=True, - verbatim_transcripts=False, - enable_word_time_offsets=True, - ) - riva.client.add_speaker_diarization_to_config(config, diarization) - - asr_client = riva.client.ASRService(auth) - try: - response = asr_client.offline_recognize(data.astype(np.int16).tobytes(), config) - if len(response.results) > 0 and len(response.results[0].alternatives) > 0: - final_transcript = "" - for res in response.results: - final_transcript += res.alternatives[0].transcript - return final_transcript, text_format.MessageToString(response, as_utf8=True) - except grpc.RpcError as rpc_error: - _LOGGER.info(f"{rpc_error.code()}, {rpc_error.details()}") - # TODO See if Gradio popup error mechanism can be used. - # For now whow error via transcript text box. - latest_transcript = rpc_error.details() - return latest_transcript, None - - return latest_transcript, None \ No newline at end of file diff --git a/RetrievalAugmentedGeneration/frontend/frontend/chat_client.py b/RetrievalAugmentedGeneration/frontend/frontend/chat_client.py index 0b95f9578..c853a07eb 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/chat_client.py +++ b/RetrievalAugmentedGeneration/frontend/frontend/chat_client.py @@ -85,7 +85,7 @@ def predict( ) try: - with requests.post(url, stream=True, json=data, timeout=30, headers=carrier) as req: + with requests.post(url, stream=True, json=data, timeout=50, headers=carrier) as req: req.raise_for_status() for chunk in req.iter_content(16): yield chunk.decode("UTF-8") diff --git a/RetrievalAugmentedGeneration/frontend/frontend/pages/converse.py b/RetrievalAugmentedGeneration/frontend/frontend/pages/converse.py index 414e6a872..54faf69fc 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/pages/converse.py +++ b/RetrievalAugmentedGeneration/frontend/frontend/pages/converse.py @@ -16,7 +16,9 @@ """This module contains the frontend gui for having a conversation.""" import functools import logging +import os from typing import Any, Dict, List, Tuple, Union +import riva.client import gradio as gr @@ -36,11 +38,37 @@ } """ +# Extract environmental variables +RIVA_API_URI = os.getenv("RIVA_API_URI", None) +RIVA_API_KEY = os.getenv("RIVA_API_KEY", None) +RIVA_FUNCTION_ID = os.getenv("RIVA_FUNCTION_ID", None) + +# Establish a connection to the Riva server +try: + use_ssl = False + metadata = [] + if RIVA_API_KEY: + use_ssl = True + metadata.append(("authorization", "Bearer " + RIVA_API_KEY)) + if RIVA_FUNCTION_ID: + use_ssl = True + metadata.append(("function-id", RIVA_FUNCTION_ID)) + auth = riva.client.Auth( + None, use_ssl=use_ssl, + uri=RIVA_API_URI, + metadata_args=metadata + ) + _LOGGER.info('Created riva.client.Auth success') +except: + _LOGGER.info('Error creating riva.client.Auth') def build_page(client: chat_client.ChatClient) -> gr.Blocks: """Build the gradio page to be mounted in the frame.""" kui_theme, kui_styles = assets.load_theme("kaizen") + asr_utils.asr_init(auth) + tts_utils.tts_init(auth) + with gr.Blocks(title=TITLE, theme=kui_theme, css=kui_styles + _LOCAL_CSS) as page: # session specific state across runs @@ -82,7 +110,7 @@ def build_page(client: chat_client.ChatClient) -> gr.Blocks: tts_checkbox = gr.Checkbox( label="Enable TTS output", info="", value=False ) - + # dropdowns with gr.Accordion("ASR and TTS Settings"): with gr.Row(): @@ -102,7 +130,7 @@ def build_page(client: chat_client.ChatClient) -> gr.Blocks: try: for model in tts_utils.TTS_MODELS: all_voices.extend(tts_utils.TTS_MODELS[model]['voices']) - default_voice = tts_utils.TTS_MODELS[tts_language_list[0]]['voices'][0] + default_voice = tts_utils.TTS_MODELS[tts_language_list[0]]['voices'][0] except: all_voices.append("No TTS voices available") default_voice = "No TTS voices available" @@ -173,9 +201,9 @@ def _toggle_context(btn: str) -> Dict[gr.component, Dict[Any, Any]]: ) tts_language_dropdown.change( - tts_utils.update_voice_dropdown, - [tts_language_dropdown], - [tts_voice_dropdown], + tts_utils.update_voice_dropdown, + [tts_language_dropdown], + [tts_voice_dropdown], api_name=False ) diff --git a/RetrievalAugmentedGeneration/frontend/frontend/tts_utils.py b/RetrievalAugmentedGeneration/frontend/frontend/tts_utils.py index ac65a0255..360e44613 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/tts_utils.py +++ b/RetrievalAugmentedGeneration/frontend/frontend/tts_utils.py @@ -28,61 +28,38 @@ _LOGGER = logging.getLogger(__name__) -# Extract environmental variables -RIVA_API_URI = os.getenv("RIVA_API_URI", None) -RIVA_API_KEY = os.getenv("RIVA_API_KEY", None) -RIVA_FUNCTION_ID = os.getenv("RIVA_FUNCTION_ID", None) - -try: - tts_sample_rate = int(os.getenv("TTS_SAMPLE_RATE", 48000)) -except Exception as e: - _LOGGER.info('TTS_SAMPLE_RATE is not set to an integer value. Defaulting to 48000.') - tts_sample_rate = 48000 - -# Establish a connection to the Riva server -try: - use_ssl = False - metadata = [] - auth = None - if RIVA_API_KEY: - use_ssl = True - metadata.append(("authorization", "Bearer " + RIVA_API_KEY)) - if RIVA_FUNCTION_ID: - use_ssl = True - metadata.append(("function-id", RIVA_FUNCTION_ID)) - auth = riva.client.Auth( - None, use_ssl=use_ssl, - uri=RIVA_API_URI, - metadata_args=metadata - ) - _LOGGER.info('Created riva.client.Auth success') -except: - _LOGGER.info('Error creating riva.client.Auth') +tts_sample_rate = int(os.getenv("TTS_SAMPLE_RATE", 48000)) # Obtain the TTS languages and voices available on the Riva server TTS_MODELS = dict() -try: - tts_client = riva.client.SpeechSynthesisService(auth) - config_response = tts_client.stub.GetRivaSynthesisConfig(riva_tts.RivaSynthesisConfigRequest()) - for model_config in config_response.model_config: - language_code = model_config.parameters['language_code'] - language_name = f"{pycountry.languages.get(alpha_2=language_code[:2]).name} ({language_code})" - voice_name = model_config.parameters['voice_name'] - subvoices = [voice.split(':')[0] for voice in model_config.parameters['subvoices'].split(',')] - full_voice_names = [voice_name + "." + subvoice for subvoice in subvoices] - - if language_name in TTS_MODELS: - TTS_MODELS[language_name]['voices'].extend(full_voice_names) - else: - TTS_MODELS[language_name] = {"language_code": language_code, "voices": full_voice_names} - - TTS_MODELS = dict(sorted(TTS_MODELS.items())) - - _LOGGER.info(json.dumps(TTS_MODELS, indent=4)) -except: - TTS_MODELS["No TTS languages available"] = "No TTS languages available" - gr.Info('The app could not find any available TTS languages. Thus, none will appear in the "TTS Language" or "TTS Voice" dropdown menus. Check that you are connected to a Riva server with TTS enabled.') - _LOGGER.info('The app could not find any available TTS languages. Thus, none will appear in the "TTS Language" or "TTS Voice" dropdown menus. Check that you are connected to a Riva server with TTS enabled.') +grpc_auth = None + +def tts_init(auth): + global TTS_MODELS + global grpc_auth + grpc_auth = auth + try: + tts_client = riva.client.SpeechSynthesisService(auth) + config_response = tts_client.stub.GetRivaSynthesisConfig(riva_tts.RivaSynthesisConfigRequest()) + for model_config in config_response.model_config: + language_code = model_config.parameters['language_code'] + language_name = f"{pycountry.languages.get(alpha_2=language_code[:2]).name} ({language_code})" + voice_name = model_config.parameters['voice_name'] + subvoices = [voice.split(':')[0] for voice in model_config.parameters['subvoices'].split(',')] + full_voice_names = [voice_name + "." + subvoice for subvoice in subvoices] + + if language_name in TTS_MODELS: + TTS_MODELS[language_name]['voices'].extend(full_voice_names) + else: + TTS_MODELS[language_name] = {"language_code": language_code, "voices": full_voice_names} + + TTS_MODELS = dict(sorted(TTS_MODELS.items())) + + _LOGGER.info(json.dumps(TTS_MODELS, indent=4)) + except: + TTS_MODELS["No TTS languages available"] = "No TTS languages available" + gr.Info('The app could not find any available TTS languages. Thus, none will appear in the "TTS Language" or "TTS Voice" dropdown menus. Check that you are connected to a Riva server with TTS enabled.') + _LOGGER.info('The app could not find any available TTS languages. Thus, none will appear in the "TTS Language" or "TTS Voice" dropdown menus. Check that you are connected to a Riva server with TTS enabled.') # Once the user selects a TTS language, narrow the options in the TTS voice # dropdown menu accordingly @@ -97,27 +74,20 @@ def update_voice_dropdown(language): ) return voice_dropdown -def text_to_speech(text, language, voice, enable_tts, auth=auth): - if not enable_tts: - return None - if auth == None: - _LOGGER.info('Riva client did not initialize properly. Skipping text to speech.') - return None, None - if language == "No TTS languages available": - gr.Info('The app cannot access TTS services. Any attempt to synthesize audio will be unsuccessful. Check that you are connected to a Riva server with TTS enabled.') - _LOGGER.info('The app cannot access TTS services. Any attempt to synthesize audio will be unsuccessful. Check that you are connected to a Riva server with TTS enabled.') - return None, gr.update(interactive=False) - if not text or not voice or not enable_tts: - gr.Info("Provide all inputs or select an example") - return None, gr.update(interactive=False) - if not text: - gr.Info('No text from which to synthesize a voice has been provided') - return None, gr.update(interactive=False) - if not voice: - gr.Info('No TTS voice or an invalid TTS voice has been selected') - return None, gr.update(interactive=False) - if not enable_tts: - gr.Info('TTS output is currently disabled. Click on the "Enable TTS output" checkbox to enable it.') +def text_to_speech(text, language, voice, enable_tts): + if enable_tts: + if language == "No TTS languages available": + gr.Info('The app cannot access TTS services. Any attempt to synthesize audio will be unsuccessful. Check that you are connected to a Riva server with TTS enabled.') + _LOGGER.info('The app cannot access TTS services. Any attempt to synthesize audio will be unsuccessful. Check that you are connected to a Riva server with TTS enabled.') + return None, gr.update(interactive=False) + if not text: + gr.Info('No text from which to synthesize a voice has been provided') + return None, gr.update(interactive=False) + if not voice: + gr.Info('No TTS voice or an invalid TTS voice has been selected') + return None, gr.update(interactive=False) + else: + _LOGGER.debug('TTS output is currently disabled. Click on the "Enable TTS output" checkbox to enable it.') return None, gr.update(interactive=False) first_buffer = True @@ -128,8 +98,9 @@ def text_to_speech(text, language, voice, enable_tts, auth=auth): # TODO: Audio download does not work with streaming audio output. # See https://github.com/gradio-app/gradio/issues/6570 - tts_client = riva.client.SpeechSynthesisService(auth) + tts_client = riva.client.SpeechSynthesisService(grpc_auth) + _LOGGER.info(f"Calling synthesize_online") response = tts_client.synthesize_online( text=text, voice_name=voice, diff --git a/RetrievalAugmentedGeneration/llm-inference-server/model_server/__main__.py b/RetrievalAugmentedGeneration/llm-inference-server/model_server/__main__.py index f34a84a9e..a72a7a005 100644 --- a/RetrievalAugmentedGeneration/llm-inference-server/model_server/__main__.py +++ b/RetrievalAugmentedGeneration/llm-inference-server/model_server/__main__.py @@ -106,7 +106,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--quantization", type=str, - required=False, + default=None, help="Quantization type to be used for LLMs", ) diff --git a/RetrievalAugmentedGeneration/llm-inference-server/model_server/conversion/llama.py b/RetrievalAugmentedGeneration/llm-inference-server/model_server/conversion/llama.py index 03f494299..60b93c667 100644 --- a/RetrievalAugmentedGeneration/llm-inference-server/model_server/conversion/llama.py +++ b/RetrievalAugmentedGeneration/llm-inference-server/model_server/conversion/llama.py @@ -78,27 +78,29 @@ def convert(model: Model, opts: ConversionOptions) -> None: str(opts.vocab_size), ] - if opts.quantization == "int4_awq" and model.format.name == "PYTORCH": - ckpt_dir = find_pt_file(model.model_dir) - raw_args.extend([ - "--use_weight_only", - "--weight_only_precision", - "int4_awq", - "--per_group", - "--quant_ckpt_path", - str(ckpt_dir), - ]) - elif opts.quantization == "None": + if opts.quantization: + if opts.quantization == "int4_awq" and model.format.name == "PYTORCH": + ckpt_dir = find_pt_file(model.model_dir) + raw_args.extend([ + "--use_weight_only", + "--weight_only_precision", + "int4_awq", + "--per_group", + "--quant_ckpt_path", + str(ckpt_dir), + ]) + else: + raise Exception( + "Unsupported quantization or model format, " \ + + f"supported quantizations: {_QUANTIZATIONS}, " \ + + "with format: PYTORCH" + ) + else: raw_args.extend([ _CHECKPOINT_ARGS_FLAGS[model.format.name], model.model_dir, ]) - else: - raise Exception( - "Unsupported quantization or model format, " \ - + f"supported quantizations: {_QUANTIZATIONS}, " \ - + "with format: PYTORCH" - ) + except KeyError as err: raise UnsupportedFormatException( model.format.name, ["PyTorch", "Hugging Face"] diff --git a/RetrievalAugmentedGeneration/requirements.txt b/RetrievalAugmentedGeneration/requirements.txt index 2a9f6214a..20b1b1aa3 100644 --- a/RetrievalAugmentedGeneration/requirements.txt +++ b/RetrievalAugmentedGeneration/requirements.txt @@ -19,3 +19,4 @@ nemollm==0.3.4 opentelemetry-sdk==1.21.0 opentelemetry-api==1.21.0 opentelemetry-exporter-otlp-proto-grpc==1.21.0 +faiss-cpu==1.7.4 diff --git a/deploy/compose/config.yaml b/deploy/compose/config.yaml index 0cac6ae79..ddf996f18 100644 --- a/deploy/compose/config.yaml +++ b/deploy/compose/config.yaml @@ -50,14 +50,21 @@ embeddings: model_name: intfloat/e5-large-v2 # The name embedding search model from huggingface or nv-ai-foundation. # Type: str + # ENV Variable: APP_EMBEDDINGS_MODELNAME dimensions: 1024 # The dimensions of the embedding search model from huggingface. # Type: int model_engine: huggingface - # The backend name hosting the model, huggingface and nv-ai-foundation are supported. + # The backend name hosting the model, huggingface, nv-ai-foundation are supported. # Type: str + # ENV Variable: APP_EMBEDDINGS_MODELENGINE + + server_url: "embeddings:9080" + # The server url of nemo embedding microservice + # Type: str + # ENV Variable: APP_EMBEDDINGS_SERVERURL prompts: # The configuration for the prompts used for response generation. diff --git a/deploy/compose/docker-compose-nemotron.yaml b/deploy/compose/docker-compose-nemotron.yaml index ce00a85a9..3dd45d864 100644 --- a/deploy/compose/docker-compose-nemotron.yaml +++ b/deploy/compose/docker-compose-nemotron.yaml @@ -8,7 +8,7 @@ services: dockerfile: Dockerfile volumes: - ${MODEL_DIRECTORY:?please update the env file and source it before running}:/model - command: ${MODEL_ARCHITECTURE:?please update the env file and source it before running} --http --max-input-length ${MODEL_MAX_INPUT_LENGTH:-3000} --max-output-length ${MODEL_MAX_OUTPUT_LENGTH:-512} --quantization ${QUANTIZATION:-None} + command: ${MODEL_ARCHITECTURE:?please update the env file and source it before running} --http --max-input-length ${MODEL_MAX_INPUT_LENGTH:-3000} --max-output-length ${MODEL_MAX_OUTPUT_LENGTH:-512} ${QUANTIZATION:+--quantization $QUANTIZATION} ports: - "8000:8000" - "8001:8001" @@ -25,12 +25,6 @@ services: - driver: nvidia device_ids: ["0", "1"] capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/v2/health/ready"] - interval: 30s - timeout: 20s - retries: 3 - start_period: 10m jupyter-server: container_name: notebook-server @@ -89,12 +83,12 @@ services: milvus: container_name: milvus-standalone - image: milvusdb/milvus:v2.3.1-gpu + image: milvusdb/milvus:v2.4.0.1-gpu-beta command: ["milvus", "run", "standalone"] environment: ETCD_ENDPOINTS: etcd:2379 MINIO_ADDRESS: minio:9010 - KNOWHERE_GPU_MEM_POOL_SIZE: 2048:4096 + KNOWHERE_GPU_MEM_POOL_SIZE: 2048;4096 volumes: - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus healthcheck: @@ -129,11 +123,12 @@ services: environment: APP_VECTORSTORE_URL: "http://milvus:19530" APP_VECTORSTORE_NAME: "milvus" + COLLECTION_NAME: ${RAG_EXAMPLE} + MILVUS_DB: ${RAG_EXAMPLE} APP_LLM_SERVERURL: "llm:8001" APP_LLM_MODELNAME: ensemble APP_LLM_MODELENGINE: triton-trt-llm APP_CONFIG_FILE: ${APP_CONFIG_FILE} - NVAPI_KEY: ${AI_PLAYGROUND_API_KEY} OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false diff --git a/deploy/compose/docker-compose-nv-ai-foundation.yaml b/deploy/compose/docker-compose-nv-ai-foundation.yaml index 1ec0e74f7..ba9ae38be 100644 --- a/deploy/compose/docker-compose-nv-ai-foundation.yaml +++ b/deploy/compose/docker-compose-nv-ai-foundation.yaml @@ -20,6 +20,8 @@ services: APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user." NVIDIA_API_KEY: ${NVIDIA_API_KEY} APP_CONFIG_FILE: ${APP_CONFIG_FILE} + APP_VECTORSTORE_NAME: "faiss" + COLLECTION_NAME: ${RAG_EXAMPLE} volumes: - ${APP_CONFIG_FILE}:${APP_CONFIG_FILE} ports: diff --git a/deploy/compose/docker-compose-pgvector.yaml b/deploy/compose/docker-compose-pgvector.yaml index 459d1535b..ba18c6a36 100644 --- a/deploy/compose/docker-compose-pgvector.yaml +++ b/deploy/compose/docker-compose-pgvector.yaml @@ -8,7 +8,7 @@ services: dockerfile: Dockerfile volumes: - ${MODEL_DIRECTORY:?please update the env file and source it before running}:/model - command: ${MODEL_ARCHITECTURE:?please update the env file and source it before running} --max-input-length ${MODEL_MAX_INPUT_LENGTH:-3000} --max-output-length ${MODEL_MAX_OUTPUT_LENGTH:-512} --quantization ${QUANTIZATION:-None} + command: ${MODEL_ARCHITECTURE:?please update the env file and source it before running} --max-input-length ${MODEL_MAX_INPUT_LENGTH:-3000} --max-output-length ${MODEL_MAX_OUTPUT_LENGTH:-512} ${QUANTIZATION:+--quantization $QUANTIZATION} ports: - "8000:8000" - "8001:8001" @@ -25,12 +25,6 @@ services: - driver: nvidia count: ${INFERENCE_GPU_COUNT:-all} capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/v2/health/ready"] - interval: 30s - timeout: 20s - retries: 3 - start_period: 10m pgvector: container_name: pgvector @@ -62,10 +56,10 @@ services: APP_LLM_MODELNAME: "ensemble" APP_LLM_MODELENGINE: "triton-trt-llm" APP_CONFIG_FILE: ${APP_CONFIG_FILE} - NVAPI_KEY: ${AI_PLAYGROUND_API_KEY} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password} POSTGRES_USER: ${POSTGRES_USER:-postgres} POSTGRES_DB: ${POSTGRES_DB:-api} + COLLECTION_NAME: ${RAG_EXAMPLE} volumes: - ${APP_CONFIG_FILE}:${APP_CONFIG_FILE} ports: diff --git a/deploy/compose/docker-compose.yaml b/deploy/compose/docker-compose.yaml index ac60a34b2..b3698a046 100644 --- a/deploy/compose/docker-compose.yaml +++ b/deploy/compose/docker-compose.yaml @@ -8,7 +8,7 @@ services: dockerfile: Dockerfile volumes: - ${MODEL_DIRECTORY:?please update the env file and source it before running}:/model - command: ${MODEL_ARCHITECTURE:?please update the env file and source it before running} --max-input-length ${MODEL_MAX_INPUT_LENGTH:-3000} --max-output-length ${MODEL_MAX_OUTPUT_LENGTH:-512} --quantization ${QUANTIZATION:-None} + command: ${MODEL_ARCHITECTURE:?please update the env file and source it before running} --max-input-length ${MODEL_MAX_INPUT_LENGTH:-3000} --max-output-length ${MODEL_MAX_OUTPUT_LENGTH:-512} ${QUANTIZATION:+--quantization $QUANTIZATION} ports: - "8000:8000" - "8001:8001" @@ -25,19 +25,13 @@ services: - driver: nvidia count: ${INFERENCE_GPU_COUNT:-all} capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/v2/health/ready"] - interval: 30s - timeout: 20s - retries: 3 - start_period: 10m jupyter-server: container_name: notebook-server image: notebook-server:latest build: context: ../../ - dockerfile: ./notebooks/Dockerfile.notebooks + dockerfile: ./notebooks/Dockerfile.notebooks # replace GPU enabled Dockerfile ./notebooks/Dockerfile.gpu_notebook ports: - "8888:8888" expose: @@ -49,8 +43,6 @@ services: - driver: nvidia count: 1 capabilities: [gpu] - depends_on: - - "llm" etcd: container_name: milvus-etcd @@ -89,12 +81,12 @@ services: milvus: container_name: milvus-standalone - image: milvusdb/milvus:v2.3.1-gpu + image: milvusdb/milvus:v2.4.0.1-gpu-beta command: ["milvus", "run", "standalone"] environment: ETCD_ENDPOINTS: etcd:2379 MINIO_ADDRESS: minio:9010 - KNOWHERE_GPU_MEM_POOL_SIZE: 2048:4096 + KNOWHERE_GPU_MEM_POOL_SIZE: 2048;4096 volumes: - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus healthcheck: @@ -129,11 +121,11 @@ services: environment: APP_VECTORSTORE_URL: "http://milvus:19530" APP_VECTORSTORE_NAME: "milvus" + COLLECTION_NAME: ${RAG_EXAMPLE} APP_LLM_SERVERURL: "llm:8001" APP_LLM_MODELNAME: ensemble APP_LLM_MODELENGINE: triton-trt-llm APP_CONFIG_FILE: ${APP_CONFIG_FILE} - NVAPI_KEY: ${AI_PLAYGROUND_API_KEY} OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false diff --git a/deploy/k8s-operator/kube-trailblazer/helm-charts/staging/rag-llm-pipeline/templates/milvus-standalone.yaml b/deploy/k8s-operator/kube-trailblazer/helm-charts/staging/rag-llm-pipeline/templates/milvus-standalone.yaml index 1b9ab8473..a52bd916a 100644 --- a/deploy/k8s-operator/kube-trailblazer/helm-charts/staging/rag-llm-pipeline/templates/milvus-standalone.yaml +++ b/deploy/k8s-operator/kube-trailblazer/helm-charts/staging/rag-llm-pipeline/templates/milvus-standalone.yaml @@ -27,7 +27,7 @@ spec: - name: ETCD_ENDPOINTS value: milvus-etcd:2379 - name: KNOWHERE_GPU_MEM_POOL_SIZE - value: 2048:4096 + value: 2048;4096 - name: MINIO_ADDRESS value: milvus-minio:9010 ports: diff --git a/docs/rag/jupyter_server.md b/docs/rag/jupyter_server.md index 82130ba53..85cdc1684 100644 --- a/docs/rag/jupyter_server.md +++ b/docs/rag/jupyter_server.md @@ -23,8 +23,46 @@ This notebook demonstrates how to use LlamaIndex to build a more complex retriev This notebook demonstrates how to use the REST FastAPI server to upload the knowledge base and then ask a question without and with the knowledge base. +6. [**Nvidia AI Endpoint Integration with langchain**](../../notebooks/07_Option(1)_NVIDIA_AI_endpoint_simple.ipynb) +This notebook demonstrates how to build a Retrieval Augmented Generation (RAG) example using the NVIDIA AI endpoint integrated with Langchain, with FAISS as the vector store. + +7. [**RAG with langchain and local LLM model from**](../../notebooks/07_Option(2)_minimalistic_RAG_with_langchain_local_HF_LLM.ipynb) +This notebook demonstrates how to plug in a local llm from HuggingFace Hub and build a simple RAG app using langchain. + +8. [**Nvidia AI Endpoint with llamaIndex and Langchain**](../../notebooks/08_Option(1)_llama_index_with_NVIDIA_AI_endpoint.ipynb) +This notebook demonstrates how to plug in a NVIDIA AI Endpoint mixtral_8x7b and embedding nvolveqa_40k, bind these into LlamaIndex with these customizations. + +9. [**Locally deployed model from Hugginface integration with llamaIndex and Langchain**](../../notebooks/08_Option(2)_llama_index_with_HF_local_LLM.ipynb) +This notebook demonstrates how to plug in a local llm from HuggingFace Hub Llama-2-13b-chat-hf and all-MiniLM-L6-v2 embedding from Huggingface, bind these to into LlamaIndex with these customizations. + +10. [**Langchain agent with tools plug in multiple models from NVIDIA AI Endpoint**](../../notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb) +This notebook demonstrates how to use multiple NVIDIA's AI endpoint's model like `mixtral_8*7b`, `Deplot` and `Neva`. + # Running the notebooks -If a JupyterLab server needs to be compiled and stood up manually for development purposes, run the following commands: +If a JupyterLab server needs to be compiled and stood up manually for development purposes, follow the following commands: + +- [Optional] Notebook `7 to 9` require GPUs. If you have a GPU and are trying out notebooks `7-9` update the jupyter-server service in the [docker-compose.yaml](../../deploy/compose/docker-compose.yaml) file to use `./notebooks/Dockerfile.gpu_notebook` as the Dockerfile +``` + jupyter-server: + container_name: notebook-server + image: notebook-server:latest + build: + context: ../../ + dockerfile: ./notebooks/Dockerfile.gpu_notebook +``` + +- [Optional] Notebook from `7-9` may need multiple GPUs. Update [docker-compose.yaml](../../deploy/compose/docker-compose.yaml) to use multiple gpu ids in `device_ids` field below or set `count: all` +``` + jupyter-server: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0', '1'] + capabilities: [gpu] +``` + - Build the container ``` source deploy/compose/compose.env diff --git a/experimental/README.md b/experimental/README.md new file mode 100644 index 000000000..53472cae3 --- /dev/null +++ b/experimental/README.md @@ -0,0 +1,37 @@ + + +# Experimental Examples + +## What is an Experimental Example? + +Experimental examples are sample code and deployments for RAG pipelines that are not tested by NVIDIA personnel. + +## Inventory + +* [NVIDIA RAG Streaming Document Ingestion Pipeline](./streaming_ingest_rag) + + This example demonstrate the construction of a performance-oriented pipeline that accepts a stream of heterogenous documents, divides the documents into smaller segments or chunks, computes the embedding vector for each of these chunks, and uploads the text chunks along with their associated embeddings to a Vector Database. This pipeline builds on the [Morpheus SDK](https://docs.nvidia.com/morpheus/index.html) to take advantage of end-to-end asynchronous processing. This pipeline showcases pipeline parallelism (including CPU and GPU-accelerated nodes), as well as, a mechanism to horizontally scale out data ingestion workers. + +* [NVIDIA Multimodal RAG Assistant](./multimodal_assistant) + + This example is able to ingest PDFs, PowerPoint slides, Word and other documents with complex data formats including text, images, slides and tables. It allows users to ask questions through a text interface and optionally with an image query, and it can respond with text and reference images, slides and tables in its response, along with source links and downloads. + +* [Run RAG-LLM in Azure Machine Learning](./AzureML) + + This example shows the configuration changes to using Docker containers and local GPUs that are required + to run the RAG-LLM pipelines in Azure Machine Learning. diff --git a/experimental/multimodal_assistant/retriever/vector.py b/experimental/multimodal_assistant/retriever/vector.py index 55469c6ed..925baf3b0 100644 --- a/experimental/multimodal_assistant/retriever/vector.py +++ b/experimental/multimodal_assistant/retriever/vector.py @@ -31,10 +31,10 @@ class VectorClient(ABC, BaseModel): @abstractmethod def connect(self): ... - + def disconnect(self): ... - + @abstractmethod def search(self, query_vectors, limit=5): ... @@ -45,7 +45,7 @@ def update(self): class MilvusVectorClient(VectorClient): - + hostname : str = "localhost" port : str = "19530" metric_type : str = "L2" @@ -62,14 +62,14 @@ def __init__(self, *args, **kwargs): self.vector_db.load() def _create_index(self, metric_type, index_type, field_name, nlist=100): - + index_params = { "metric_type": metric_type, # or "IP" depending on your requirement "index_type": index_type, # You can choose other types like IVF_PQ based on your need "params": {"nlist": nlist} # Adjust the nlist parameter as per your requirements } self.vector_db.create_index(field_name=field_name, index_params=index_params) - + def connect(self, collection_name, hostname, port, alias="default"): connections.connect(alias, host=hostname, port=port) try: @@ -85,7 +85,7 @@ def disconnect(self, alias="default"): def search(self, query_vectors, limit=5): search_params = { - "metric_type": self.metric_type, + "metric_type": self.metric_type, "params": {"nprobe": self.nprobe} } @@ -147,7 +147,7 @@ def get_schema(self, embedding_size): def create_collection(self, collection_name, embedding_size): if utility.has_collection(collection_name): utility.drop_collection(collection_name) - + schema = self.get_schema(embedding_size) self.vector_db = Collection(name=collection_name, schema=schema) @@ -160,13 +160,13 @@ class QdrantVectorClient(VectorClient): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.vector_db = self.connect(self.hostname, self.port) - + def connect(self, hostname, port): client = QdrantClient(host=hostname, port=port) return client - + def search(self, query_vectors, limit=5): - + search_results = self.vector_db.search( collection_name=self.collection_name, query_vector=query_vectors, diff --git a/experimental/streaming_ingest_rag/.env b/experimental/streaming_ingest_rag/.env new file mode 100644 index 000000000..22ba42fbc --- /dev/null +++ b/experimental/streaming_ingest_rag/.env @@ -0,0 +1,16 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +### Specify Docker Volume Mounts ### +export MILVUS_VOLUME_DIRECTORY=./docker diff --git a/experimental/streaming_ingest_rag/.gitignore b/experimental/streaming_ingest_rag/.gitignore new file mode 100644 index 000000000..d48e2d82c --- /dev/null +++ b/experimental/streaming_ingest_rag/.gitignore @@ -0,0 +1,38 @@ +# Python Exclusions +.venv +**__pycache__** + +# Helm Exclusions +**/charts/*.tgz + +# project temp files +deploy/*.log +deploy/*.txt + +# Docker Compose exclusions +volumes/ +uploaded_files/ + +# Visual Studio Code +.vscode + +# Node modules +**/node_modules + +# Jupyter Notebook +.ipynb_checkpoints + +# ignore container volumes +**/docker/volumes + +# ignore extenal repos +**/docker/morpheus +**/docker/attu + +# misc +.docker +.gitconfig + +# ignore misc directories +**/.data +**/.depr diff --git a/experimental/streaming_ingest_rag/README.md b/experimental/streaming_ingest_rag/README.md new file mode 100644 index 000000000..414ee4f5b --- /dev/null +++ b/experimental/streaming_ingest_rag/README.md @@ -0,0 +1,586 @@ + + +# RAG Example: Streaming Data Ingest + +## Table of Contents + +1. [Background Information](#Background-Information) + - [Purpose](#Purpose) + - [Source Documents](#Source-Documents) + - [Embedding Model](#Embedding-Model) + - [Vector Database Service](#Vector-Database-Service) +2. [Implementation and Design Decisions](#Implementation-and-Design-Decisions) +3. [Getting Started](#Getting-Started) + - [Prerequisites](#Prerequisites) + - [Build the Containers](#Build-the-Containers) + - [Start the Streaming Ingest Cluster](#Start-the-Streaming-Ingest-Cluster) + - [Performance Instrumentation](#Performance-Instrumentation) + - [Pipeline Customization](#Pipeline-Customization) + - [Cluster Management](#Cluster-Management) +4. [Configuration Settings](#Configuration-Settings) +5. [Additional Options](#Additional-Options) + - [Exporting and Deploying a Different Model from Huggingface](#Exporting-and-Deploying-a-Different-Model-from-Huggingface) + +## Background Information + +### Purpose + +The primary objective of this example is to demonstrate the construction of a performance-oriented pipeline that performs the following tasks: + +- Accepts a stream of heterogenous documents +- Divides the documents into smaller segments or chunks. +- Computes the embedding vector for each of these chunks. +- Uploads the text chunks along with their associated embeddings to a Vector Database (VDB). + +This pipeline builds on the [Morpheus SDK](https://docs.nvidia.com/morpheus/index.html) to take advantage of end-to-end asynchronous processing. This pipeline showcases pipeline parallelism (including CPU and GPU-accelerated nodes), as well as, a mechanism to horizontally scale out data ingestion workers. + +### Source Documents + +- The pipeline is designed to process text-based input from various document types. Possible use cases could + include structured documents like PDFs, dynamic sources such as web pages, and image-based documents through future + Optical Character Recognition (OCR) integration. + +- For this demonstration, the source documents are obtained from raw text published to Kakfa, URLs to be scraped from Kafka, and static list of RSS feeds combined with a web scraper, and sample PDF documents. The rationale + behind this selection includes: + - Emulating practical scenarios: Cybersecurity RSS feeds can serve as the foundation for a comprehensive + knowledge database, such as for a security chatbot. + - Minimizing external dependencies: Relying on RSS feeds and web scraping avoids the need for specialized datasets + or API keys. + - Representing heterogeneous data: Enterprises may have static and streaming data sources that flow through this data pipeline + +### Embedding Model + +- The pipeline can accommodate various embedding models that transform text into vectors of floating-point numbers. + Several models from Huggingface, such as `paraphrase-multilingual-mpnet-base-v2`, `e5-large-v2`, + and `all-mpnet-base-v2`, have been evaluated for compatibility. These models are not stored in this repository, but are downloaded from community sources at build time. + +- For the purposes of this demonstration, the model `all-MiniLM-L6-v2` will be employed for its efficiency and compactness, characterized by a smaller embedding dimension. + +### Vector Database Service + +- The architecture is agnostic to the choice of Vector Database (VDB) for storing embeddings and their metadata. While + the present implementation employs Milvus due to its GPU-accelerated indices, the design supports easy integration + with other databases like Chroma or FAISS, should the need arise. + +## Implementation and Design Decisions + +### Implementation Details + +[Original GitHub issue](https://github.com/nv-morpheus/Morpheus/issues/1298) + +The pipeline is composed of three primary components: + +1. **Document Source Handler**: This component is responsible for acquiring and preprocessing the text data. Given that + we are using RSS feeds and a web scraper in this example, the handler's function is to fetch the latest updates from + the feeds, perform preliminary data cleaning, and standardize the format for subsequent steps. + +2. **Embedding Generator**: This is the heart of the pipeline, which takes the preprocessed text chunks and computes + their embeddings. Leveraging the model `all-MiniLM-L6-v2` from Huggingface, the text data is transformed into + embeddings with a dimension of 384. + +3. **Vector Database Uploader**: Post embedding generation, this module takes the embeddings alongside their associated + metadata and pushes them to a Vector Database (VDB). For our implementation, [Milvus](https://milvus.io/), a GPU-accelerated vector + database, has been chosen. + +### Rationale Behind Design Decisions + +The selection of specific components and models was influenced by several factors: + +- **Document Source Choice**: RSS feeds and web scraping offer a dynamic and continuously updating source of data. For + the use-case of building a repository for a cybersecurity, real-time information fetching is a reasonable choice. + +- **Model Selection for Embeddings**: `all-MiniLM-L6-v2` was chosen due to its efficiency in generating embeddings. Its + smaller dimension ensures quick computations without compromising the quality of embeddings. + +- **Vector Database**: For the purposes of this pipeline, [Milvus](https://milvus.io/) was chosen due to its popularity, ease of use, and + availability. + +## Getting Started + +### Prerequisites + +Before running the pipeline, we need to ensure that the following services are running: + +- Operating System: Ubuntu 22.04 +- Volta architecture GPU or better +- [NVIDIA driver 520.61.05 or higher](https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html) +- [Docker](https://docs.docker.com/desktop/install/linux-install/) +- [Docker Compose](https://docs.docker.com/compose/install/standalone/) - 1.28.0 or higher of Docker Compose, and preferably v2. If you encounter an error similar to: + + ```none + ERROR: The Compose file './docker-compose.yml' is invalid because: + services.jupyter.deploy.resources.reservations value Additional properties are not allowed ('devices' was + unexpected) + ``` + + This is most likely due to using an older version of the docker-compose command, instead re-run the build with docker compose. Refer to Migrate to Compose V2 for more information. + +- [The NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) +- [Git LFS](https://git-lfs.com/) + +### Build the Containers + +This step can take a while, as some containers are built from source. + +#### Option 1: Build all Convience Script + +Step 1: Run All Container Build Convienience Script + +This is useful the first time you build out the infrastructure. + +```bash +./docker/build_all.sh +``` + +#### Option 2: Build Components Individually + +Step 1: Bring in [Morpheus SDK](https://docs.nvidia.com/morpheus/index.html) dependencies + +```bash +./docker/build_morpheus.sh +``` + +Step 2: Bring in [Attu](https://milvus.io/docs/v2.1.x/attu_collection.md) dependencies + +```bash +./docker/build_attu.sh +``` + +Step 3: Build and pull containers + +```bash +docker-compose build attu streaming-ingest-dev ingest-worker producer +``` + +```bash +docker-compose pull etcd minio standalone zookeeper kafka init-kafka triton +``` + +### Start the Streaming Ingest Cluster + +Step 1: Start containers + +```bash +docker-compose up -d +``` + +Step 2: Stream some data into the Kafka cluster + +Output from help utility: + +```bash +./utils/produce_messages.sh: option requires an argument -- h + +usage: ./utils/produce_messages.sh [-s SOURCE_TYPE] [-n N_MESSAGES] +options: + -h Show this help message and exit. + -s SOURCE_TYPE Source type to generate (url, raw, or both) + -n N_MESSAGES Number of messages to publish to Kafka. (Default value: 1000) +``` + +Example usage, streaming 1000 url and raw data examples into Kafka: + +```bash +./utils/produce_messages.sh -s both -n 1000 +``` + +Step 3: Login to [Attu](https://milvus.io/docs/v2.1.x/attu_collection.md) for [Milvus](https://milvus.io/) administration and interaction with stored vectors: + +`localhost:3000` + +When logging in to [Attu](https://milvus.io/docs/v2.1.x/attu_collection.md), paste the url below as the "Milvus Address": + +`http://milvus:19530` + +### Performance Instrumentation + +Step 1: View docker logs to inspect the performance of each `ingest-worker` + +```bash +docker logs streaming_ingest_rag_ingest-worker_1 -f +``` + +Note - In this example, we are leveraging [Triton Inference Server's](https://developer.nvidia.com/triton-inference-server) support for [ONNX with TensorRT Optimization](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/optimization.md#onnx-with-tensorrt-optimization-ort-trt). As a result, the first few inferences will be slow as the ONNX embedding model is converted to a [TensorRT](https://developer.nvidia.com/tensorrt) engine. + +### Pipeline Customization + +This pipeline builds on the Morpheus SDK to implement the streaming ingest pipeline. Customizations can be made within the `streaming-ingest-dev` container defined in the `docker-compose.yml` file. + +A Jupyer Notebook, including [NVdashboard](https://github.com/rapidsai/jupyterlab-nvdashboard) extensions to monitor resource utilziation, is made available at: + +`http://localhost:8888` + +To start, consider tuning values in the yaml file below: + +`/workspace/examples/llm/vdb_upload/kafka_config.yaml` (the default example) + +or + +`/workspace/examples/llm/vdb_upload/vdb_config` (for additional heterogenous ingest workflows) + +For more advanced customizations, following references will describe how to build custom Morpheus [Modules](https://docs.nvidia.com/morpheus/developer_guide/guides/7_python_modules.html) and [Stages](https://docs.nvidia.com/morpheus/developer_guide/architecture.html#stage-details) to further extend these examples to custom data types, transformations, etc. + +- [Developer Guide](https://docs.nvidia.com/morpheus/developer_guide/guides/index.html) +- [Developer Guide Source Code](https://github.com/nv-morpheus/Morpheus/tree/branch-24.03/examples/developer_guide) + +### Cluster Management + +Step 1: (Optional) Scale up streaming ingest workers to increase hardware saturation and boost throughput + +```bash +docker-compose up --scale ingest-worker=3 -d +``` + +Step 2: (Optional) Stop or tear down all or named running containers, note, Triton will need to recompile TRT engines after this step + +```bash +docker-compose stop +``` + +```bash +docker-compose stop +``` + +```bash +docker-compose down +``` + +```bash +docker-compose down +``` + +## Configuration Settings + +The configuration for this streaming ingest pipeline is expose by a YAML file at the following location: + +`./morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml` + +Users are empowered to tune configuration settings (e.g. new kafka topics for additional experiments). When +configuring the Morpheus Pipeline, especially for stages like the RSS source and the Vector Database Upload, it's +important to balance responsiveness and performance. + +- **Kafka Source Stage**: The Kafka source stage is responsible for subscribing to a Kafka topic and yielding payloads links + for processing. In the pure web scraping example, larger batch size can lead to decreased responsiveness, as the subsequent + pure web scraping stage may take considerable time to retrieve and process all messages in the same batch. It is suggested + to configure this stage with a smaller batch size, as this change has minimal impact on overall performance, while balancing + responsiveness. + +- **RSS Source Stage**: The RSS source stage is responsible for yielding webpage links for processing. A larger batch size + at this stage can lead to decreased responsiveness, as the subsequent web scraper stage may take a considerable amount of + time to retrieve and process all the items in each batch. To ensure a responsive experience for users, it's recommended + to configure the RSS source stage with a relatively smaller batch size. This adjustment tends to have minimal impact on + overall performance while significantly improving the time to process each batch of links. + +- **Vector Database Upload Stage**: At the other end of the pipeline, the Vector Database Upload stage has its own + considerations. This stage experiences a significant transaction overhead. To mitigate this, it is advisable to configure + this stage with the largest batch size possible. This approach helps in efficiently managing transaction overheads and + improves the throughput of the pipeline, especially when dealing with large volumes of data. + +Balancing these configurations ensures that the pipeline runs efficiently, with optimized responsiveness at the RSS +source stage and improved throughput at the Vector Database Upload stage. + +### YAML Configuration Examples + +*Example: Defining sources via a config file* +Note: see `vdb_config.yaml` for a full configuration example. + +`vdb_config.yaml` + +```yaml +vdb_pipeline: + sources: + - type: filesystem + name: "demo_filesystem_source" + config: + batch_size: 1024 + enable_monitor: False + extractor_config: + chunk_size: 512 + chunk_overlap: 50 + num_threads: 10 # Number of threads to use for file reads + filenames: + - "/path/to/data/*" + watch: false +``` + +*Example: Defining a custom source via a config file* +Note: See `vdb_config.yaml` for a full configuration example. +Note: This example uses the same module and config as the filesystem source example above, but explicitly specifies the +module to load + +`vdb_config.yaml` + +```yaml +vdb_pipeline: + sources: + - type: custom + name: "demo_custom_filesystem_source" + module_id: "file_source_pipe" # Required for custom source, defines the source module to load + module_output_id: "output" # Required for custom source, defines the output of the module to use + namespace: "morpheus_examples_llm" # Required for custom source, defines the namespace of the module to load + config: + batch_size: 1024 + extractor_config: + chunk_size: 512 + num_threads: 10 # Number of threads to use for file reads + config_name_mapping: "file_source_config" + filenames: + - "/path/to/data/*" + watch: false +``` + +```bash +python examples/llm/main.py vdb_upload pipeline \ + --vdb_config_path "./vdb_config.yaml" +``` + +### Morpheus Pipeline Configuration Schema + +The Morpheus Pipeline configuration allows for detailed specification of various pipeline stages, including source +definitions (like RSS feeds and filesystem paths), embedding configurations, and vector database settings. + +### Sources Configuration + +The `sources` section allows you to define multiple data sources of different types: RSS, filesystem, and custom. + +#### Embeddings Configuration + +- **isolate_embeddings**: Boolean to isolate embeddings. +- **model_kwargs**: + - **force_convert_inputs**: Boolean to force the conversion of inputs. + - **model_name**: Name of the model, e.g., `"all-MiniLM-L6-v2"`. + - **server_url**: URL of the server, e.g., `"triton:8001"`. + - **use_shared_memory**: Boolean to use shared memory. + +#### Pipeline Configuration + +- **edge_buffer_size**: Size of the edge buffer, e.g., `128`. +- **feature_length**: Length of the features, e.g., `512`. +- **max_batch_size**: Maximum size of the batch, e.g., `256`. +- **num_threads**: Number of threads, e.g., `10`. +- **pipeline_batch_size**: Size of the batch for the pipeline, e.g., `1024`. + +#### Kafka Source Configuration - Web Scraper + +- **type**: `'kafka'` +- **name**: Name of the Kafka source. +- **config**: + - **stage_config**: + - **enable_monitor**: Boolean to enable monitoring. + - **namespace**: Name of namespace of stage modules. + - **module_id**: Name of source module. + - **module_output_id**: Name of output port of source module. + - **transform_type**: Name of module to transform data. + - **deserialize_config**: + - output_batch_size: Number of elements per batch emitted from source stage. + - **kafka_config**: + - **max_batch_size**: Number of kafka messages per batch emitted from kafka source module. + - **bootstrap_servers**: URL to a Kafka broker that can serve data. + - **input_topic**: Name of topic containing messages to process. + - **group_id**: Consumer group this worker/stage will belong to. + - **poll_interval**: How often to poll Kafka for new data (pandas format). + - **disable_commit**: Boolean to control possible arrival of duplicate messages. + - **disable_pre_filtering**: Boolean controling skipping committing messages as they are pulled off the server. + - **auto_offset_reset**: Decision to consume from the beginning of a topic partition or only new messages. + - **stop_after**: Number of records before stopping ingestion of new messages. + - **async_commits**: Boolean to decided to asynchronously acknowledge consuming Kafka messages. + - **web_scraper_config**: + - **chunk_overlap**: Overlap size for chunks. + - **chunk_size**: Size of content chunks for processing. + - **enable_cache**: Boolean to enable caching. + - **cache_path**: Path to sqlite database for caching. + - **enable_cache**: Directory container sqlite database for caching. + - **link_column**: Column containing url to be scraped. + - **vdb_config**: + - **vdb_resource_name**: Name of collection in VectorDB. + +#### Kafka Source Configuration - Raw Text + +- **type**: `'kafka'` +- **name**: Name of the Kafka source. +- **config**: + - **stage_config**: + - **enable_monitor**: Boolean to enable monitoring. + - **namespace**: Name of namespace of stage modules. + - **module_id**: Name of source module. + - **module_output_id**: Name of output port of source module. + - **transform_type**: Name of module to transform data. + - **deserialize_config**: + - output_batch_size: Number of elements per batch emitted from source stage. + - **kafka_config**: + - **max_batch_size**: Number of kafka messages per batch emitted from kafka source module. + - **bootstrap_servers**: URL to a Kafka broker that can serve data. + - **input_topic**: Name of topic containing messages to process. + - **group_id**: Consumer group this worker/stage will belong to. + - **poll_interval**: How often to poll Kafka for new data (pandas format). + - **disable_commit**: Boolean to control possible arrival of duplicate messages. + - **disable_pre_filtering**: Boolean controling skipping committing messages as they are pulled off the server. + - **auto_offset_reset**: Decision to consume from the beginning of a topic partition or only new messages. + - **stop_after**: Number of records before stopping ingestion of new messages. + - **async_commits**: Boolean to decided to asynchronously acknowledge consuming Kafka messages. + - **raw_chunker_config**: + - **chunk_overlap**: Overlap size for chunks. + - **chunk_size**: Size of content chunks for processing. + - **payload_column**: Column containing text to be processed. + - **vdb_config**: + - **vdb_resource_name**: Name of collection in VectorDB. + +#### RSS Source Configuration + +- **type**: `'rss'` +- **name**: Name of the RSS source. +- **config**: + - **batch_size**: Number of RSS feeds to process at a time. + - **cache_dir**: Directory for caching. + - **cooldown_interval_sec**: Cooldown interval in seconds. + - **enable_cache**: Boolean to enable caching. + - **enable_monitor**: Boolean to enable monitoring. + - **feed_input**: List of RSS feed URLs. + - **interval_sec**: Interval in seconds for fetching new feed items. + - **request_timeout_sec**: Timeout in seconds for RSS feed requests. + - **run_indefinitely**: Boolean to indicate continuous running. + - **stop_after**: Stop after emitting a specific number of records. + - **web_scraper_config**: + - **chunk_overlap**: Overlap size for chunks. + - **chunk_size**: Size of content chunks for processing. + - **enable_cache**: Boolean to enable caching. + +#### Filesystem Source Configuration + +- **type**: `'filesystem'` +- **name**: Name of the filesystem source. +- **config**: + - **batch_size**: Number of files to process at a time. + - **chunk_overlap**: Overlap size for chunks. + - **chunk_size**: Size of chunks for processing. + - **converters_meta**: Metadata for converters. + - **csv**: + - **chunk_size**: Chunk size for CSV processing. + - **text_column_names**: Column names to be used as text. + - **column_name_0** Column name 0. + - **column_name_1** Column name 1. + - **enable_monitor**: Boolean to enable monitoring. + - **extractor_config**: + - **chunk_size**: Size of chunks for the extractor. + - **num_threads**: Number of threads for file reads. + - **filenames**: List of file paths to be processed. + - **watch**: Boolean to watch for file changes. + +#### Custom Source Configuration + +- **type**: `'custom'` +- **name**: Name of the custom source. +- **config**: + - **config_name_mapping**: Mapping name for file source config. + - **module_id**: Identifier of the module to use. + - **module_output_id**: Output identifier of the module. + - **namespace**: Namespace of the module. + - **other_config_parameter_1**: Other config parameter 1. + - **other_config_parameter_2**: Other config parameter 2. + +#### Tokenizer Configuration + +- **model_kwargs**: + - **add_special_tokens**: Boolean to add special tokens. + - **column**: Column name, e.g., `"content"`. + - **do_lower_case**: Boolean to convert to lowercase. + - **truncation**: Boolean to truncate. + - **vocab_hash_file**: Path to the vocabulary hash file. +- **model_name**: Name of the tokenizer model. + +#### Vector Database (VDB) Configuration + +- **batch_size**: Size of the embeddings to store in the vector. +- **resource_name**: Size of the embeddings to store in the vector. +- **embedding_size**: Size of the embeddings to store in the vector database. +- **recreate**: Boolean to recreate the resource if it exists. +- **resource_name**: Identifier for the resource in the vector database. +- **service**: Type of vector database service (e.g., `"milvus"`). +- **uri**: URI for connecting to the Vector Database server. + +## Additional Options + +Within one of Morpheus containers, the `vdb_upload` command has its own set of options and commands: + +- `export-triton-model` +- `langchain` +- `pipeline` + +### Exporting and Deploying a Different Model from Huggingface + +If you're looking to incorporate a different embedding model from Huggingface into the pipeline, follow the steps below +using `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` as an example: + +1. **Identify the Desired Model**: + - Head over to the [Huggingface Model Hub](https://huggingface.co/models) and search for the model you want. For + this example, we are looking at `e5-large-v2`. + +2. **Run the Pipeline Call with the Chosen Model**: + - Execute the following command with the model name you've identified: + + ```bash + python examples/llm/main.py vdb_upload export-triton-model \ + --model_name sentence-transformers/paraphrase-multilingual-mpnet-base-v2 \ + --triton_repo ./models/triton-model-repo \ + --output_model_name paraphrase-multilingual-mpnet-base-v2 + ``` + +3. **Handling Unauthorized Errors**: + - Please ensure you provide the correct model name. A common pitfall is encountering an `unauthorized error`. If + you see the following error: + + ```text + requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: + ``` + + This typically means the model name you provided does not match the one available on Huggingface. Double-check + the model name and try again. + +4. **Confirm Successful Model Export**: + - After running the command, ensure that the specified `--triton_repo` directory now contains the exported model in + the correct format, ready for deployment. + + ```bash + ls /workspace/models/triton-model-repo | grep paraphrase-multilingual-mpnet-base-v2 + + sentence-transformers/paraphrase-multilingual-mpnet-base-v2 + ``` + +5. **Deploy the Model**: + - Leverage the Triton REST API to load this model. + + ```bash + curl -X POST triton:8000/v2/repository/models/paraphrase-multilingual-mpnet-base-v2/load + ``` + + - Leverage the Triton REST API validate the load of this model.You should see something similar to the following, indicating Triton has successfully loaded the model: + + ```bash + curl -X POST triton:8000/v2/repository/index + + [{"name":"paraphrase-multilingual-mpnet-base-v2","version":"1","state":"READY"}] + ``` + +6. **Update the Pipeline Call**: + + - Now that the model has been exported and deployed, we can update the yaml file to use the new model: + + ```bash + python examples/llm/main.py vdb_upload pipeline \ + --vdb_config_path "examples/llm/vdb_upload/kafka_config.yaml" + ``` diff --git a/experimental/streaming_ingest_rag/docker-compose.yml b/experimental/streaming_ingest_rag/docker-compose.yml new file mode 100644 index 000000000..4e3b18609 --- /dev/null +++ b/experimental/streaming_ingest_rag/docker-compose.yml @@ -0,0 +1,289 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: '3.5' + +services: + + etcd: + restart: always + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.5 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - ./docker/volumes/etcd:/etcd + command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 30s + timeout: 20s + retries: 3 + networks: + - default + + minio: + restart: always + container_name: milvus-minio + image: minio/minio:RELEASE.2023-03-20T20-16-18Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "9001:9001" + - "9000:9000" + volumes: + - ./docker/volumes/minio:/minio_data + command: minio server /minio_data --console-address ":9001" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + networks: + - default + + standalone: + restart: always + container_name: milvus-standalone + image: milvusdb/milvus:v2.3.5 + command: ["milvus", "run", "standalone"] + hostname: milvus + security_opt: + - seccomp:unconfined + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + volumes: + - ./docker/volumes/milvus:/var/lib/milvus + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] + interval: 30s + start_period: 90s + timeout: 20s + retries: 3 + ports: + - "19530:19530" + - "9091:9091" + depends_on: + - "etcd" + - "minio" + networks: + - default + + attu: + restart: always + image: zilliz/attu:v2.3.5 + container_name: attu + hostname: attu + build: + context: ./docker/attu + dockerfile: Dockerfile + environment: + MILVUS_URL: http://milvus:19530 + ports: + - "3000:3000" + networks: + - default + + zookeeper: + image: confluentinc/cp-zookeeper:7.3.2 + hostname: zookeeper + container_name: zookeeper + ports: + - "2181:2181" + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_SERVER_ID: 1 + ZOOKEEPER_SERVERS: zookeeper:2888:3888 + networks: + - default + + kafka: + restart: always + image: confluentinc/cp-kafka:7.3.2 + hostname: kafka + container_name: kafka + ports: + - "9092:9092" + - "29092:29092" + - "9999:9999" + environment: + KAFKA_ADVERTISED_LISTENERS: INTERNAL://kafka:19092,EXTERNAL://${DOCKER_HOST_IP:-127.0.0.1}:9092,DOCKER://host.docker.internal:29092 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INTERNAL:PLAINTEXT,EXTERNAL:PLAINTEXT,DOCKER:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: INTERNAL + KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181" + KAFKA_BROKER_ID: 1 + KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_JMX_PORT: 9999 + KAFKA_JMX_HOSTNAME: ${DOCKER_HOST_IP:-127.0.0.1} + KAFKA_AUTHORIZER_CLASS_NAME: kafka.security.authorizer.AclAuthorizer + KAFKA_ALLOW_EVERYONE_IF_NO_ACL_FOUND: "true" + depends_on: + - zookeeper + networks: + - default + + init-kafka: + image: confluentinc/cp-kafka:7.3.2 + depends_on: + - kafka + container_name: init-kafka + entrypoint: [ '/bin/sh', '-c' ] + command: | + " + # blocks until kafka is reachable + kafka-topics --bootstrap-server kafka:19092 --list + + echo -e 'Creating kafka topics' + kafka-topics --bootstrap-server kafka:19092 --create --if-not-exists --topic scrape_queue --replication-factor 1 --partitions 10 + kafka-topics --bootstrap-server kafka:19092 --create --if-not-exists --topic raw_queue --replication-factor 1 --partitions 10 + + echo -e 'Successfully created the following topics:' + kafka-topics --bootstrap-server kafka:19092 --list + " + networks: + - default + + streaming-ingest-dev: + restart: always + depends_on: + - kafka + build: + context: ./docker + dockerfile: Dockerfile.morpheus + target: jupyter + args: + - MORPHEUS_CONTAINER=${MORPHEUS_CONTAINER:-nvcr.io/nvidia/morpheus/morpheus} + - MORPHEUS_CONTAINER_VERSION=${MORPHEUS_CONTAINER_VERSION:-24.03-runtime} + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + image: streaming_ingest_morpheus_jupyter + container_name: streaming-ingest-dev + ports: + - "8888:8888" + networks: + - default + command: jupyter-lab --allow-root --ip=0.0.0.0 --port=8888 --no-browser --NotebookApp.token='' + volumes: + - ./docker/morpheus/models:/workspace/models + - ./docker/morpheus/examples:/workspace/examples + - ./morpheus_examples/streaming_ingest_rag:/workspace/examples/streaming_ingest_rag + cap_add: + - sys_nice + + ingest-worker: + restart: always + depends_on: + init-kafka: + condition: service_started + standalone: + condition: service_healthy + build: + context: ./docker + dockerfile: Dockerfile.morpheus + target: runtime + args: + - MORPHEUS_CONTAINER=${MORPHEUS_CONTAINER:-nvcr.io/nvidia/morpheus/morpheus} + - MORPHEUS_CONTAINER_VERSION=${MORPHEUS_CONTAINER_VERSION:-24.03-runtime} + image: streaming_ingest_morpheus + stdin_open: true + tty: true + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + networks: + - default + environment: + TERM: "${TERM:-}" + command: python examples/llm/main.py vdb_upload pipeline --vdb_config_path "examples/llm/vdb_upload/kafka_config.yaml" + volumes: + - ./docker/morpheus/models:/workspace/models + - ./docker/morpheus/examples:/workspace/examples + - ./morpheus_examples/streaming_ingest_rag/vdb_upload:/workspace/examples/llm/vdb_upload + cap_add: + - sys_nice + + producer: + build: + context: ../.. + dockerfile: ./experimental/streaming_ingest_rag/docker/Dockerfile.producer + depends_on: + - init-kafka + - kafka + container_name: producer + image: streaming_ingest_producer + stdin_open: true + tty: true + environment: + TERM: "${TERM:-}" + N_KAFKA_TOPIC_PARTITIONS: 10 + networks: + - default + + triton: + restart: always + image: nvcr.io/nvidia/tritonserver:23.11-py3 + container_name: triton + hostname: triton + ports: + - "8000:8000" + - "8001:8001" + - "8002:8002" + volumes: + - ./docker/morpheus/models:/models + command: [ + "tritonserver", + "--model-repository=/models/triton-model-repo", + "--exit-on-error=false", + "--log-info=true", + "--strict-readiness=false", + "--disable-auto-complete-config", + "--model-control-mode=explicit", + "--load-model", + "all-MiniLM-L6-v2", + ] + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-f", "localhost:8000/v2/health/ready"] + interval: 30s + timeout: 20s + retries: 3 + networks: + - default + +networks: + default: + name: streaming-ingest diff --git a/experimental/streaming_ingest_rag/docker/Dockerfile.morpheus b/experimental/streaming_ingest_rag/docker/Dockerfile.morpheus new file mode 100644 index 000000000..219ed25ed --- /dev/null +++ b/experimental/streaming_ingest_rag/docker/Dockerfile.morpheus @@ -0,0 +1,60 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG MORPHEUS_CONTAINER=nvcr.io/nvidia/morpheus/morpheus +ARG MORPHEUS_CONTAINER_VERSION=24.03-runtime + +FROM ${MORPHEUS_CONTAINER}:${MORPHEUS_CONTAINER_VERSION} as base + +# # Fix the entrypoint to work with different WORKDIR +ENTRYPOINT [ "/opt/conda/bin/tini", "--", "/workspace/docker/entrypoint.sh" ] + +SHELL ["/bin/bash", "-c"] + +# Copy the conda_env file +COPY ./conda/environments/conda_env.yaml ./conda/environments/conda_env.yaml + +# Install dependencies +RUN source activate morpheus \ + && conda env update --solver=libmamba -n morpheus --file ./conda/environments/conda_env.yaml \ + && conda clean -afy + +# Install additional packages +RUN apt update && apt install curl \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +# ===== Setup for running unattended ===== +FROM base as runtime + +# Launch morpheus +CMD ["/bin/bash"] + +# ===== Setup for running Jupyter ===== +FROM base as jupyter + +# Install the jupyter specific requirements +RUN source activate morpheus &&\ + mamba install -y -c conda-forge \ + ipywidgets \ + nb_conda_kernels \ + jupyter_contrib_nbextensions \ + # notebook v7 is incompatible with jupyter_contrib_nbextensions + notebook=6 &&\ + jupyter contrib nbextension install --user &&\ + pip install jupyterlab_nvdashboard==0.9 + +# Launch jupyter +CMD ["jupyter-lab", "--allow-root", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--NotebookApp.token=''"] diff --git a/experimental/streaming_ingest_rag/docker/Dockerfile.producer b/experimental/streaming_ingest_rag/docker/Dockerfile.producer new file mode 100644 index 000000000..cbaeef0d7 --- /dev/null +++ b/experimental/streaming_ingest_rag/docker/Dockerfile.producer @@ -0,0 +1,32 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM ubuntu:22.04 + +WORKDIR /workspace/src + +ADD experimental/streaming_ingest_rag/producer/src . +ADD notebooks/dataset.zip data + +SHELL ["/bin/bash", "-c"] + +RUN apt-get update \ + && apt-get install -y python3-pip vim \ + && pip3 install \ + confluent-kafka==1.9.2 \ + jsonlines==4.0.0 \ + pymupdf==1.23.25 \ + && rm -rf /var/lib/apt/lists/* + +RUN python3 generate_dataset.py diff --git a/experimental/streaming_ingest_rag/docker/build_all.sh b/experimental/streaming_ingest_rag/docker/build_all.sh new file mode 100755 index 000000000..fddd5a9a3 --- /dev/null +++ b/experimental/streaming_ingest_rag/docker/build_all.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Download Morpheus source and build base morpheus container + +./docker/build_morpheus.sh +./docker/build_attu.sh + +docker-compose build attu streaming-ingest-dev ingest-worker producer +docker-compose pull etcd minio standalone zookeeper kafka init-kafka triton diff --git a/experimental/streaming_ingest_rag/docker/build_attu.sh b/experimental/streaming_ingest_rag/docker/build_attu.sh new file mode 100755 index 000000000..eb12c2669 --- /dev/null +++ b/experimental/streaming_ingest_rag/docker/build_attu.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Download attu source to build container + +SCRIPT_HOME=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ATTU_ROOT=$SCRIPT_HOME/attu + +git clone https://github.com/zilliztech/attu.git $ATTU_ROOT +cd $ATTU_ROOT +git checkout -b streaming-ingest-branch c42d59716a72c10033822358dbdd622c69b156fd diff --git a/experimental/streaming_ingest_rag/docker/build_morpheus.sh b/experimental/streaming_ingest_rag/docker/build_morpheus.sh new file mode 100755 index 000000000..b0dd511b6 --- /dev/null +++ b/experimental/streaming_ingest_rag/docker/build_morpheus.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Download Morpheus source and build base morpheus container + +SCRIPT_HOME=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +MORPHEUS_ROOT=$SCRIPT_HOME/morpheus + +# Clone Morpheus github +git clone https://github.com/nv-morpheus/Morpheus.git $MORPHEUS_ROOT +cd $MORPHEUS_ROOT +git checkout -b streaming-ingest-branch aa8d42e79936bc7b2558682ca1197cedca8c7041 +# Add tag to name container +git tag 24.03 + +git lfs install +python3 ./scripts/fetch_data.py fetch models +cp ../extras/all-MiniLM-L6-v2_config.pbtxt ./models/triton-model-repo/all-MiniLM-L6-v2/config.pbtxt + +# Build container base image +./docker/build_container_release.sh + +cd $SCRIPT_HOME diff --git a/experimental/streaming_ingest_rag/docker/conda/environments/conda_env.yaml b/experimental/streaming_ingest_rag/docker/conda/environments/conda_env.yaml new file mode 100644 index 000000000..3eee24c83 --- /dev/null +++ b/experimental/streaming_ingest_rag/docker/conda/environments/conda_env.yaml @@ -0,0 +1,43 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +channels: + - conda-forge + - defaults +dependencies: + - arxiv=1.4 + - onnx # required for triton model export + - pip + - pypdf=3.16 + - newspaper3k=0.2 + - requests-cache=1.1 + + ####### Pip Transitive Dependencies (keep sorted!) ####### + # These are dependencies that are available on conda, but are required by the pip packages listed below. Its much + # better to install them with conda than pip to allow for better dependency resolution. + - environs=9.5 + - minio=7.1 + - pydantic=2.4 + - python-dotenv=1.0 + - ujson=5.8 + + ####### Pip Dependencies (keep sorted!) ####### + - pip: + - farm-haystack[file-conversion] + - grpcio-status==1.58 # To keep in sync with 1.58 grpcio which is installed for Morpheus + - langchain==0.0.310 + - pymilvus==2.3.2 # The conda package is woefully out of date and incorrect + - sentence_transformers==2.3.0 + - PyMuPDF==1.23.21 diff --git a/experimental/streaming_ingest_rag/docker/extras/all-MiniLM-L6-v2_config.pbtxt b/experimental/streaming_ingest_rag/docker/extras/all-MiniLM-L6-v2_config.pbtxt new file mode 100644 index 000000000..9d582dbc1 --- /dev/null +++ b/experimental/streaming_ingest_rag/docker/extras/all-MiniLM-L6-v2_config.pbtxt @@ -0,0 +1,45 @@ +name: "all-MiniLM-L6-v2" +platform: "onnxruntime_onnx" +max_batch_size: 256 +input { + name: "input_ids" + data_type: TYPE_INT32 + dims: 512 +} +input { + name: "attention_mask" + data_type: TYPE_INT32 + dims: 512 +} +output { + name: "output" + data_type: TYPE_FP32 + dims: 384 +} +dynamic_batching { + preferred_batch_size: 1 + preferred_batch_size: 2 + preferred_batch_size: 4 + preferred_batch_size: 8 + preferred_batch_size: 16 + preferred_batch_size: 32 + preferred_batch_size: 64 + preferred_batch_size: 128 + preferred_batch_size: 256 + max_queue_delay_microseconds: 50000 +} +optimization { + execution_accelerators { + gpu_execution_accelerator { + name: "tensorrt" + parameters { + key: "max_workspace_size_bytes" + value: "2147483648" + } + parameters { + key: "precision_mode" + value: "FP16" + } + } + } +} diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/__init__.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/__init__.py new file mode 100644 index 000000000..6c9841892 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .run import run + +__all__ = [ + "run", +] diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/export_model.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/export_model.py new file mode 100644 index 000000000..f7bd5796f --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/export_model.py @@ -0,0 +1,250 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import inspect +import logging +import os +import typing + +import torch +import torch.nn.functional as F +from sentence_transformers import SentenceTransformer +from transformers.models.auto.modeling_auto import AutoModel +from transformers.models.auto.tokenization_auto import AutoTokenizer +from transformers.models.bert.modeling_bert import BertModel +from tritonclient.grpc.model_config_pb2 import DataType +from tritonclient.grpc.model_config_pb2 import ModelConfig +from tritonclient.grpc.model_config_pb2 import ModelInput +from tritonclient.grpc.model_config_pb2 import ModelOptimizationPolicy +from tritonclient.grpc.model_config_pb2 import ModelOutput +from tritonclientutils import np_to_triton_dtype + +try: + import onnx +except ImportError as exc: + raise RuntimeError("Please install onnx to use this feature. Run `mamba install -c conda-forge onnx`") from exc + +logger = logging.getLogger(__name__) + + +class CustomTokenizer(torch.nn.Module): + # pylint: disable=abstract-method + def __init__(self, model_name: str): + super().__init__() + + self.inner_model = AutoModel.from_pretrained(model_name) + + if (isinstance(self.inner_model, SentenceTransformer)): + self._output_dim = self.inner_model.get_sentence_embedding_dimension() + elif (isinstance(self.inner_model, BertModel)): + self._output_dim = self.inner_model.config.hidden_size + + sig = inspect.signature(self.inner_model.forward) + + ordered_list_keys = list(sig.parameters.keys()) + if ordered_list_keys[0] == "self": + ordered_list_keys = ordered_list_keys[1:] + + # Save the idx of the attention mask because exporting prefers arguments over kwargs + self._attention_mask_idx = ordered_list_keys.index("attention_mask") + + # Wrap the original function so the export can find the original signature + @functools.wraps(self.inner_model.forward) + def forward(*args, **kwargs): + return self._forward(*args, **kwargs) + + self.forward = forward + + @property + def output_dim(self): + return self._output_dim + + # Mean Pooling - Take attention mask into account for correct averaging + def mean_pooling(self, model_output, attention_mask): + # Adapted from https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 + # First element of model_output contains all token embeddings + last_hidden_state = model_output["last_hidden_state"] # [batch_size, seq_length, hidden_size] + + alternate = True + + if (alternate): + last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0) + return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None] + + # Transpose to make broadcasting possible + last_hidden_state = torch.transpose(last_hidden_state, 0, 2) # [hidden_size, seq_length, batch_size] + + input_mask_expanded = torch.transpose(attention_mask.unsqueeze(-1).float(), 0, 2) # [1, seq_length, batch_size] + + num = torch.sum(last_hidden_state * input_mask_expanded, 1) # [hidden_size, batch_size] + denom = torch.clamp(input_mask_expanded.sum(1), min=1e-9) # [1, batch_size] + + return torch.transpose(num / denom, 0, 1) # [batch_size, hidden_size] + + def normalize(self, embeddings): + + alternate = False + + if (alternate): + return F.normalize(embeddings, p=2, dim=1) + + # Use the same trick here to broadcast to avoid using the expand operator which breaks dynamic axes + denom = torch.transpose(embeddings.norm(2, 1, keepdim=True).clamp_min(1e-12), 0, 1) + + return torch.transpose(torch.transpose(embeddings, 0, 1) / denom, 0, 1) + + def _forward(self, *args, **kwargs): + + if ("attention_mask" in kwargs): + attention_mask = kwargs["attention_mask"] + elif (len(args) > self._attention_mask_idx): + # Lookup from positional + attention_mask = args[self._attention_mask_idx] + else: + raise RuntimeError("Cannot determine attention mask") + + model_outputs = self.inner_model(*args, **kwargs) + + sentence_embeddings = self.mean_pooling(model_outputs, attention_mask) + + sentence_embeddings = self.normalize(sentence_embeddings) + + return sentence_embeddings + + +def _save_model(model, sample_input: dict, output_model_path: str): + + # Ensure our input is a dictionary, not a batch encoding + args = (dict(sample_input.items()), ) + + inspect.signature(model.forward) + + torch.onnx.export( + model, + args, + output_model_path, + opset_version=13, + input_names=['input_ids', 'attention_mask'], + output_names=['output'], + dynamic_axes={ + 'input_ids': { + 0: 'batch_size', + 1: "seq_length", + }, # variable length axes + 'attention_mask': { + 0: 'batch_size', + 1: "seq_length", + }, + 'output': { + 0: 'batch_size', + } + }, + verbose=False) + + onnx_model = onnx.load(output_model_path) + + onnx.checker.check_model(onnx_model) + + +def build_triton_model(model_name, model_seq_length, max_batch_size, triton_repo, output_model_name): + + if (output_model_name is None): + output_model_name = model_name + + device = torch.device("cuda") + + model_name = f'{model_name}' + + model = CustomTokenizer(model_name) + model.to(device) + model.eval() + + tokenizer = AutoTokenizer.from_pretrained(model_name) + + test_texts = [ + "This is text one which is longer", + "This is text two", + ] + + sample_input = tokenizer(test_texts, + max_length=model_seq_length, + padding="max_length", + truncation=True, + return_token_type_ids=False, + return_tensors="pt").to(device) + + test_output = model(**(sample_input.to(device))).detach() + + output_model_dir = os.path.join(triton_repo, output_model_name) + + # Make sure we create the directory if it does not exist + os.makedirs(output_model_dir, exist_ok=True) + + # Make the config file + config: typing.Any = typing.cast(typing.Any, ModelConfig()) + + config.name = output_model_name + config.platform = "onnxruntime_onnx" + config.max_batch_size = max_batch_size + + # pylint: disable=no-member + for input_name, input_data in sample_input.data.items(): + + config.input.append( + ModelInput( + name=input_name, + data_type=DataType.Value(f"TYPE_{np_to_triton_dtype(input_data.cpu().numpy().dtype)}"), + dims=[input_data.shape[1]], + )) + + config.output.append( + ModelOutput( + name="output", + data_type=DataType.Value(f"TYPE_{np_to_triton_dtype(test_output.cpu().numpy().dtype)}"), + dims=[test_output.shape[1]], + )) + + def _powers_of_2(max_val: int): + val = 1 + + while (val <= max_val): + yield val + val *= 2 + + config.dynamic_batching.preferred_batch_size.extend(x for x in _powers_of_2(max_batch_size)) + config.dynamic_batching.max_queue_delay_microseconds = 50000 + + config.optimization.execution_accelerators.gpu_execution_accelerator.extend([ + ModelOptimizationPolicy.ExecutionAccelerators.Accelerator(name="tensorrt", + parameters={ + "precision_mode": "FP16", + "max_workspace_size_bytes": "2147483648", + }) + ]) + + config_path = os.path.join(output_model_dir, "config.pbtxt") + + with open(config_path, "w", encoding="utf-8") as f: + f.write(str(config)) + + model_version_dir = os.path.join(output_model_dir, "1") + + os.makedirs(model_version_dir, exist_ok=True) + + output_model_path = os.path.join(model_version_dir, "model.onnx") + + _save_model(model, sample_input, output_model_path=output_model_path) + + logger.info("Created Triton Model at %s", output_model_dir) diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/helper.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/helper.py new file mode 100644 index 000000000..913391cd2 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/helper.py @@ -0,0 +1,233 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import typing + +from morpheus.config import Config +from morpheus.messages import ControlMessage +from morpheus.pipeline.pipeline import Pipeline +from morpheus.stages.general.linear_modules_source import LinearModuleSourceStage +from morpheus.stages.input.kafka_source_stage import KafkaSourceStage +from morpheus.stages.input.kafka_source_stage import AutoOffsetReset +from morpheus.utils.module_utils import ModuleLoaderFactory + +from .module.file_source_pipe import FileSourcePipeLoaderFactory +from .module.rss_source_pipe import RSSSourcePipeLoaderFactory +from .module.kafka_source_pipe import KafkaSourcePipeLoaderFactory + +logger = logging.getLogger(__name__) + + +def validate_source_config(source_info: typing.Dict[str, any]) -> None: + """ + Validates the configuration of a source. + + This function checks whether the given source configuration dictionary + contains all required keys: 'type', 'name', and 'config'. + + Parameters + ---------- + source_info : typing.Dict[str, any] + The source configuration dictionary to validate. + + Raises + ------ + ValueError + If any of the required keys ('type', 'name', 'config') are missing + in the source configuration. + """ + if ('type' not in source_info or 'name' not in source_info or 'config' not in source_info): + raise ValueError(f"Each source must have 'type', 'name', and 'config':\n {source_info}") + + +def setup_rss_source(pipe: Pipeline, config: Config, source_name: str, rss_config: typing.Dict[str, typing.Any]): + """ + Set up the RSS source stage in the pipeline. + + Parameters + ---------- + pipe : Pipeline + The pipeline to which the RSS source stage will be added. + config : Config + Configuration object for the pipeline. + source_name : str + The name of the RSS source stage. + rss_config : typing.Dict[str, Any] + Configuration parameters for the RSS source stage. + + Returns + ------- + SubPipeline + The sub-pipeline stage created for the RSS source. + """ + module_definition = RSSSourcePipeLoaderFactory.get_instance( + module_name=f"rss_source_pipe__{source_name}", + module_config={"rss_config": rss_config}, + ) + rss_pipe = pipe.add_stage( + LinearModuleSourceStage(config, module_definition, output_type=ControlMessage, output_port_name="output")) + + return rss_pipe + + +def setup_filesystem_source(pipe: Pipeline, config: Config, source_name: str, fs_config: typing.Dict[str, typing.Any]): + """ + Set up the filesystem source stage in the pipeline. + + Parameters + ---------- + pipe : Pipeline + The pipeline to which the filesystem source stage will be added. + config : Config + Configuration object for the pipeline. + source_name : str + The name of the filesystem source stage. + fs_config : typing.Dict[str, Any] + Configuration parameters for the filesystem source stage. + + Returns + ------- + SubPipeline + The sub-pipeline stage created for the filesystem source. + """ + + module_loader = FileSourcePipeLoaderFactory.get_instance(module_name=f"file_source_pipe__{source_name}", + module_config={"file_source_config": fs_config}) + file_pipe = pipe.add_stage( + LinearModuleSourceStage(config, module_loader, output_type=ControlMessage, output_port_name="output")) + + return file_pipe + +def setup_kafka_source(pipe: Pipeline, config: Config, source_name: str, type_config: typing.Dict[str, typing.Any]): + """ + Set up the kafka source stage in the pipeline. + + Parameters + ---------- + pipe : Pipeline + The pipeline to which the filesystem source stage will be added. + config : Config + Configuration object for the pipeline. + source_name : str + The name of the filesystem source stage. + fs_config : typing.Dict[str, Any] + Configuration parameters for the filesystem source stage. + + Returns + ------- + SubPipeline + The sub-pipeline stage created for the kafka source. + """ + + module_definition = KafkaSourcePipeLoaderFactory.get_instance( + module_name=f"kafka_source_pipe__{source_name}", + module_config={"kafka_config": type_config}, + ) + + kafka_pipe = pipe.add_stage( + LinearModuleSourceStage(config, module_definition, output_type=ControlMessage, output_port_name="output")) + + return kafka_pipe + +def setup_custom_source(pipe: Pipeline, config: Config, source_name: str, custom_config: typing.Dict[str, typing.Any]): + """ + Setup a custom source stage in the pipeline. + + Parameters + ---------- + pipe : Pipeline + The pipeline to which the custom source stage will be added. + config : Config + Configuration object for the pipeline. + source_name : str + The name of the custom source stage. + custom_config : typing.Dict[str, Any] + Configuration parameters for the custom source stage, including + the module_id, module_name, namespace, and any additional parameters. + + Returns + ------- + SubPipeline + The sub-pipeline stage created for the custom source. + """ + + module_id = custom_config.pop('module_id') + module_name = f"{module_id}__{source_name}" + module_namespace = custom_config.pop('namespace') + module_output_id = custom_config.pop('module_output_id', 'output') + + module_config = { + "module_id": module_id, + "module_name": module_name, + "namespace": module_namespace, + } + + config_name_mapping = custom_config.pop('config_name_mapping', 'config') + module_config[config_name_mapping] = custom_config + + # Adding the custom module stage to the pipeline + custom_pipe = pipe.add_stage( + LinearModuleSourceStage(config, module_config, output_type=ControlMessage, output_port_name=module_output_id)) + + return custom_pipe + + +def process_vdb_sources(pipe: Pipeline, config: Config, vdb_source_config: typing.List[typing.Dict]) -> typing.List: + """ + Processes and sets up sources defined in a vdb_source_config. + + This function reads the source configurations provided in vdb_source_config and + sets up each source based on its type ('rss', 'filesystem', or 'custom'). + It validates each source configuration and then calls the appropriate setup + function to add the source to the pipeline. + + Parameters + ---------- + pipe : Pipeline + The pipeline to which the sources will be added. + config : Config + Configuration object for the pipeline. + vdb_source_config : List[Dict] + A list of dictionaries, each containing the configuration for a source. + + Returns + ------- + list + A list of the sub-pipeline stages created for each defined source. + + Raises + ------ + ValueError + If an unsupported source type is encountered in the configuration. + """ + vdb_sources = [] + for source_info in vdb_source_config: + validate_source_config(source_info) + source_type = source_info['type'] + source_name = source_info['name'] + source_config = source_info['config'] + + if (source_type == 'rss'): + vdb_sources.append(setup_rss_source(pipe, config, source_name, source_config)) + elif (source_type == 'filesystem'): + vdb_sources.append(setup_filesystem_source(pipe, config, source_name, source_config)) + elif (source_type == 'kafka'): + vdb_sources.append(setup_kafka_source(pipe, config, source_name, source_config)) + elif (source_type == 'custom'): + vdb_sources.append(setup_custom_source(pipe, config, source_name, source_config)) + else: + raise ValueError(f"Unsupported source type: {source_type}") + + return vdb_sources diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml new file mode 100644 index 000000000..95b779728 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml @@ -0,0 +1,197 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vdb_pipeline: + embeddings: + isolate_embeddings: false + model_kwargs: + force_convert_inputs: true + model_name: "all-MiniLM-L6-v2" + server_url: "triton:8001" + use_shared_memory: false + + pipeline: + edge_buffer_size: 128 + feature_length: 512 + max_batch_size: 64 + embedding_size: 384 + + sources: + - type: kafka + name: "web_scrape" + config: + stage_config: + enable_monitor: true + namespace: "morpheus_examples_llm" + module_id: "kafka_source_pipe" + module_output_id: "output" + transform_type: web_scraper + deserialize_config: + output_batch_size: 2048 # Number of chunked documents per output batch + kafka_config: + max_batch_size: 64 + bootstrap_servers: "kafka:19092" + input_topic: "scrape_queue" + group_id: "morpheus" + poll_interval: "10millis" + disable_commit: false + disable_pre_filtering: false + auto_offset_reset: "latest" + stop_after: 0 + async_commits: true + web_scraper_config: + chunk_overlap: 51 + chunk_size: 512 + enable_cache: false + cache_path: "./.cache/llm/html/WebScrapeModule.sqlite" + cache_dir: "./.cache/llm/html" + link_column: "payload" + vdb_config: + vdb_resource_name: "vdb_kafka_scrape" + + - type: kafka + name: "raw_chunk" + config: + stage_config: + enable_monitor: true + run_indefinitely: true # TODO map to kafka source + namespace: "morpheus_examples_llm" + module_id: "kafka_source_pipe" + module_output_id: "output" + transform_type: raw_chunker + deserialize_config: + output_batch_size: 2048 # Number of chunked documents per output batch + kafka_config: + max_batch_size: 256 + bootstrap_servers: "kafka:19092" + input_topic: "raw_queue" + group_id: "morpheus" + poll_interval: "10millis" + disable_commit: false + disable_pre_filtering: false + auto_offset_reset: "latest" + stop_after: 0 + async_commits: true + raw_chunker_config: + chunk_overlap: 51 + chunk_size: 512 + payload_column: "payload" + vdb_config: + vdb_resource_name: "vdb_kafka_raw" + + tokenizer: + model_kwargs: + add_special_tokens: false + column: "content" + do_lower_case: true + truncation: true + vocab_hash_file: "data/bert-base-uncased-hash.txt" + model_name: "bert-base-uncased-hash" + + vdb: + batch_size: 16384 # Vector DB max batch size + resource_name: "vdb_kafka_raw" # Identifier for the resource in the vector database + embedding_size: 384 + write_time_interval: 20 # Max time between successive uploads + recreate: False # Whether to recreate the resource if it already exists + service: "milvus" # Specify the type of vector database + uri: "http://milvus:19530" # URI for connecting to the Vector Database server + resource_schemas: + vdb_kafka_scrape: + index_conf: + field_name: embedding + metric_type: L2 + index_type: HNSW + params: + M: 8 + efConstruction: 64 + + vdb_kafka_raw: + index_conf: + field_name: embedding + metric_type: L2 + index_type: HNSW + params: + M: 8 + efConstruction: 64 + + schema_conf: + enable_dynamic_field: true + schema_fields: + - name: id + dtype: INT64 + description: Primary key for the collection + is_primary: true + auto_id: true + - name: title + dtype: VARCHAR + description: Title or heading of the data entry + max_length: 65_535 + - name: source + dtype: VARCHAR + description: Source or origin of the data entry + max_length: 65_535 + - name: summary + dtype: VARCHAR + description: Brief summary or abstract of the data content + max_length: 65_535 + - name: content + dtype: VARCHAR + description: Main content or body of the data entry + max_length: 65_535 + - name: embedding + dtype: FLOAT_VECTOR + description: Embedding vectors representing the data entry + dim: 384 # Size of the embeddings to store in the vector database + description: Collection schema for diverse data sources + + vdb_kafka_scrape: + index_conf: + field_name: embedding + metric_type: L2 + index_type: HNSW + params: + M: 8 + efConstruction: 64 + + schema_conf: + enable_dynamic_field: true + schema_fields: + - name: id + dtype: INT64 + description: Primary key for the collection + is_primary: true + auto_id: true + - name: title + dtype: VARCHAR + description: Title or heading of the data entry + max_length: 65_535 + - name: source + dtype: VARCHAR + description: Source or origin of the data entry + max_length: 65_535 + - name: summary + dtype: VARCHAR + description: Brief summary or abstract of the data content + max_length: 65_535 + - name: content + dtype: VARCHAR + description: Main content or body of the data entry + max_length: 65_535 + - name: embedding + dtype: FLOAT_VECTOR + description: Embedding vectors representing the data entry + dim: 384 # Size of the embeddings to store in the vector database + description: Collection schema for diverse data sources + \ No newline at end of file diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/langchain.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/langchain.py new file mode 100644 index 000000000..b494750a3 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/langchain.py @@ -0,0 +1,59 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import pickle + +# pylint: disable=no-name-in-module +from langchain.document_loaders.rss import RSSFeedLoader +from langchain.embeddings.huggingface import HuggingFaceEmbeddings +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.vectorstores.milvus import Milvus + +from examples.llm.vdb_upload.vdb_utils import build_rss_urls +from morpheus.utils.logging_timer import log_time + +logger = logging.getLogger(__name__) + + +def chain(model_name, save_cache): + with log_time(msg="Seeding with chain took {duration} ms. {rate_per_sec} docs/sec", log_fn=logger.debug) as log: + loader = RSSFeedLoader(urls=build_rss_urls()) + + documents = loader.load() + + if (save_cache is not None): + with open(save_cache, "wb") as f: + pickle.dump(documents, f) + + text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20, length_function=len) + + documents = text_splitter.split_documents(documents) + + log.count = len(documents) + + logger.info("Loaded %s documents", len(documents)) + + embeddings = HuggingFaceEmbeddings( + model_name=model_name, + model_kwargs={'device': 'cuda'}, + encode_kwargs={ + # 'normalize_embeddings': True, # set True to compute cosine similarity + "batch_size": 100, + }) + + with log_time(msg="Adding to Milvus took {duration} ms. Doc count: {count}. {rate_per_sec} docs/sec", + count=log.count, + log_fn=logger.debug): + Milvus.from_documents(documents, embeddings, collection_name="LangChain", drop_old=True) diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/__init__.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/__init__.py new file mode 100644 index 000000000..237a16585 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from vdb_upload.module.web_scraper_module import WebScraperLoaderFactory as web_scraper +from vdb_upload.module.raw_chunker_module import RawChunkerLoaderFactory as raw_chunker + +__all__ = [ + "web_scraper", + "raw_chunker" +] diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/content_extractor_module.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/content_extractor_module.py new file mode 100644 index 000000000..3ad44fc77 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/content_extractor_module.py @@ -0,0 +1,379 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import logging +import os +import typing +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from functools import wraps +from typing import Dict +from typing import List + +import fitz +import fsspec +import mrc +import mrc.core.operators as ops +import pandas as pd +from docx import Document +from langchain.text_splitter import RecursiveCharacterTextSplitter +from pydantic import BaseModel # pylint: disable=no-name-in-module +from pydantic import Field +from pydantic import ValidationError +from pydantic import validator + +from morpheus.messages import MessageMeta +from morpheus.utils.module_utils import ModuleLoaderFactory +from morpheus.utils.module_utils import register_module + + +class CSVConverterSchema(BaseModel): + chunk_overlap: int = 102 # Example default value + chunk_size: int = 1024 + text_column_names: List[str] + + class Config: + extra = "forbid" + + +class ContentExtractorSchema(BaseModel): + batch_size: int = 32 + chunk_overlap: int = 51 + chunk_size: int = 512 + converters_meta: Dict[str, Dict] = Field(default_factory=dict) + num_threads: int = 10 + + @validator('converters_meta', pre=True, allow_reuse=True) + def val_converters_meta(cls, to_validate: Dict[str, Dict]) -> Dict[str, Dict]: # pylint: disable=no-self-argument + validated_meta = {} + for key, value in to_validate.items(): + if key.lower() == 'csv': + validated_meta[key] = CSVConverterSchema(**value) + else: + validated_meta[key] = value + return validated_meta + + class Config: + extra = "forbid" + + +logger = logging.getLogger(__name__) + +ContentExtractorLoaderFactory = ModuleLoaderFactory("file_content_extractor", + "morpheus_examples_llm", + ContentExtractorSchema) + + +@dataclass +class FileMeta: + file_path: str + file_name: str + file_type: str + + +@dataclass +class ConverterInputInfo: + io_bytes: io.BytesIO + meta: dict + + +def get_file_meta(open_file: fsspec.core.OpenFile) -> FileMeta: + """ + Extract file metadata from the given open file. + + Parameters + ---------- + open_file: fsspec.core.OpenFile + OpenFile object + + Returns + ------- + FileMeta + Returns FileMeta instance. + """ + try: + file_path = open_file.path + file_name = os.path.basename(file_path) + _, file_type = os.path.splitext(file_name) + + if len(file_type) > 0: + file_type = file_type.lstrip('.') + else: + file_type = 'none' + + return FileMeta(file_path=file_path, file_name=file_name, file_type=file_type) + + except Exception as e: + logger.error("Error retrieving file metadata for %s: %s", open_file.path, e) + raise + + +def read_file_to_bytesio(file_path: str) -> io.BytesIO: + """ + Read the content of the file and return it as an io.BytesIO object. + + Parameters + ---------- + file_path: str + Path to the file. + + Returns + ------- + io.BytesIO or None + Returns io.BytesIO object if the file is successfully read. Returns + None if there is an error reading the file. + """ + + io_bytes = None + + try: + with open(file_path, 'rb') as file: + io_bytes = io.BytesIO(file.read()) + except FileNotFoundError: + logger.error("Error: File not found - %s", file_path) + except PermissionError: + logger.error("Error: Permission denied - %s", file_path) + except Exception as e: + logger.error("Error reading file %s: %s", file_path, e) + + return io_bytes + + +def _converter_error_handler(func: typing.Callable) -> typing.Callable: + + @wraps(func) + def wrapper(input_info: ConverterInputInfo, *args, **kwargs): + try: + # Common logic for instance check + if not isinstance(input_info.io_bytes, io.BytesIO): + raise ValueError("Invalid input type. Supported type: io.BytesIO.") + + return func(input_info, *args, **kwargs) + except Exception as exec_info: + logger.error("Error in %s: %s", func.__name__, exec_info) + return func.__annotations__.get("return_type", None)() + + return wrapper + + +@_converter_error_handler +def _pdf_to_text_converter(input_info: ConverterInputInfo) -> str: + text = "" + pdf_document = fitz.open(stream=input_info.io_bytes, filetype="pdf") + for page_num in range(pdf_document.page_count): + page = pdf_document[page_num] + text += page.get_text() + return text + + +@_converter_error_handler +def _docx_to_text_converter(input_info: ConverterInputInfo) -> str: + text = "" + doc = Document(io.BytesIO(input_info.io_bytes.read())) + text = '\n'.join([paragraph.text for paragraph in doc.paragraphs]) + return text + + +@_converter_error_handler +def _csv_to_text_converter(input_info: ConverterInputInfo) -> list[str]: + text_arr = [] + text_column_names = set("content") + if input_info.meta is not None: + text_column_names = set(input_info.meta.get("csv", {}).get("text_column_names", text_column_names)) + df = pd.read_csv(input_info.io_bytes) + if len(df.columns) == 0 or (not text_column_names.issubset(set(df.columns))): + raise ValueError("The CSV file must either include a 'content' column or have a " + "columns specified in the meta configuration with key 'text_column_names'.") + df.fillna(value='', inplace=True) + text_arr = df[sorted(text_column_names)].apply(lambda x: ' '.join(map(str, x)), axis=1).tolist() + return text_arr + + +@_converter_error_handler +def _text_converter(input_info: ConverterInputInfo) -> str: + text = "" + convertor_conf = input_info.meta.get("txt", {}) + encoding = convertor_conf.get("encoding", "utf-8") + input_info.io_bytes.seek(0) + text = input_info.io_bytes.read().decode(encoding) + return text + + +def process_content(docs: str | list[str], file_meta: FileMeta, chunk_size: int, chunk_overlap: int) -> list[dict]: + """ + Processes the content of a file and splits it into chunks. + + Parameters + ---------- + docs : str | list[str] + Documents content. + file_meta: FileMeta + FileMeta parsed information of a file path. + chunk_size : int + Size of each chunk. + chunk_overlap : int + Overlap between consecutive chunks. + + Returns + ------- + list of dicts + A list of dictionaries, each with a chunk of content and file metadata. + """ + + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + length_function=len) + + processed_data = [] + + if isinstance(docs, str): + docs = [docs] + + for document in docs: + try: + split_text = text_splitter.split_text(document) + + for chunk in split_text: + processed_data.append({ + 'title': file_meta.file_name, + 'source': f"{file_meta.file_type}:{file_meta.file_path}", + 'summary': 'none', + 'content': chunk + }) + + except Exception as e: + logger.error("Error processing file %s content: %s", file_meta.file_path, e) + continue + + return processed_data + + +@register_module("file_content_extractor", "morpheus_examples_llm") +def file_content_extractor(builder: mrc.Builder): + """ + Extracts text from PDF and TXT files and constructs a DataFrame with the extracted content. + + This module processes a batch of files, reading their contents and extracting text data to form a DataFrame. + It can handle both PDF and TXT files. The module uses a ThreadPoolExecutor for parallel file reading. + + Parameters + ---------- + builder : mrc.Builder + The Morpheus builder instance to attach this module to. + + Notes + ----- + The `module_config` should contain: + - 'batch_size': int, the number of files to process in parallel. + - 'num_threads': int, the number of threads to use for parallel file reading. + - 'chunk_size' : int, size of each chunk of document. + - 'chunk_overlap' : int, overlap between consecutive chunks. + - 'converters_meta' : dict, converters configuration. + + The function reads files in parallel but processes the content serially within each batch to prevent CPU contention. + + Example `module_config` + ----------------------- + { + "batch_size": 32, + "num_threads": 10 + } + """ + module_config = builder.get_current_module_config() + + try: + extractor_config = ContentExtractorSchema(**module_config) + except ValidationError as e: + # Format the error message for better readability + error_messages = '; '.join([f"{error['loc'][0]}: {error['msg']}" for error in e.errors()]) + log_error_message = f"Invalid configuration for file_content_extractor: {error_messages}" + logger.error(log_error_message) + + raise + + # Use validated configurations + batch_size = extractor_config.batch_size + num_threads = extractor_config.num_threads + chunk_size = extractor_config.chunk_size + chunk_overlap = extractor_config.chunk_overlap + converters_meta = extractor_config.converters_meta + + converters = { + "pdf": _pdf_to_text_converter, + "csv": _csv_to_text_converter, + "docx": _docx_to_text_converter, + "txt": _text_converter + } + + chunk_params = { + file_type: { + "chunk_size": converters_meta.get(file_type, {}).get("chunk_size", chunk_size), + "chunk_overlap": converters_meta.get(file_type, {}).get("chunk_overlap", chunk_overlap) + } + for file_type in converters + } + + def parse_files(open_files: typing.List[fsspec.core.OpenFile]) -> MessageMeta: + data = [] + _fs = fsspec.filesystem(protocol='file') + + with ThreadPoolExecutor(max_workers=num_threads) as executor: + for i in range(0, len(open_files), batch_size): + batch = open_files[i:i + batch_size] + futures = [] + files_meta = [] + + for open_file in batch: + # Check if file exists + if (not _fs.exists(open_file.path)): + logger.warning("File does not exist: %s. Skipping...", open_file.path) + continue + + if (_fs.isdir(open_file.path)): + logger.warning("File is a directory: %s. Skipping...", open_file.path) + continue + + try: + file_meta: FileMeta = get_file_meta(open_file=open_file) + futures.append(executor.submit(read_file_to_bytesio, file_meta.file_path)) + files_meta.append(file_meta) + + except Exception as e: + logger.error("Error processing file %s: %s", open_file.path, e) + + for file_meta, future in zip(files_meta, futures): + io_bytes = future.result() + + if io_bytes: + converter = converters.get(file_meta.file_type, _text_converter) + input_info = ConverterInputInfo(io_bytes=io_bytes, meta=converters_meta) + result = converter(input_info) + # Get chunk params for the file type, default to txt + file_type_chunk_params = chunk_params[ + file_meta.file_type] if file_meta.file_type in chunk_params else chunk_params['txt'] + result = process_content(result, + file_meta, + file_type_chunk_params["chunk_size"], + file_type_chunk_params["chunk_overlap"]) + if result: + data.extend(result) + + df_final = pd.DataFrame(data) + + return MessageMeta(df=df_final) + + node = builder.make_node("text_extractor", ops.map(parse_files), ops.filter(lambda x: x is not None)) + builder.register_module_input("input", node) + builder.register_module_output("output", node) \ No newline at end of file diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/file_source_pipe.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/file_source_pipe.py new file mode 100644 index 000000000..62cbf484d --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/file_source_pipe.py @@ -0,0 +1,163 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import mrc +from pydantic import ValidationError +from vdb_upload.module.schema_transform import SchemaTransformLoaderFactory +from vdb_upload.schemas.file_source_pipe_schema import FileSourcePipeSchema + +from morpheus.modules.general.monitor import MonitorLoaderFactory +from morpheus.modules.input.multi_file_source import MultiFileSourceLoaderFactory +from morpheus.modules.preprocess.deserialize import DeserializeLoaderFactory +from morpheus.utils.module_utils import ModuleLoaderFactory +from morpheus.utils.module_utils import register_module + +from vdb_upload.module.content_extractor_module import ContentExtractorLoaderFactory +from vdb_upload.module.vdb_resource_tagging_module import VDBResourceTaggingLoaderFactory + +logger = logging.getLogger(__name__) + +FileSourcePipeLoaderFactory = ModuleLoaderFactory("file_source_pipe", "morpheus_examples_llm", FileSourcePipeSchema) + + +@register_module("file_source_pipe", "morpheus_examples_llm") +def _file_source_pipe(builder: mrc.Builder): + """ + Sets up a pipeline for processing file sources. + + This function configures a pipeline that reads files, processes their content + based on specified configurations, and outputs the processed data. It integrates modules for + multi-file sourcing, file content extraction, and schema transformation, along with monitoring + at various stages. + + Parameters + ---------- + builder : mrc.Builder + The Morpheus builder to which the pipeline modules will be added. + + Notes + ----- + The module configuration can include the following parameters: + + - **file_source_config**: Configuration for the file source module. + - **batch_size**: Number of files to process in each batch. + - **chunk_overlap**: Overlap size for chunks in file processing. + - **chunk_size**: Size of chunks for file processing. + - **converters_meta**: Metadata for file format converters. + - **csv**: Configuration for CSV files. + - **chunk_size**: Chunk size for CSV processing. + - **text_column_name**: Name of the text column in CSV files. + - **enable_monitor**: Boolean to enable monitoring for this module. + - **extractor_config**: Configuration for the file content extractor module. + - **chunk_size**: Size of chunks for the extractor. + - **num_threads**: Number of threads for file content extraction. + - **filenames**: List of file paths to be processed. + - **watch**: Boolean to watch for file changes. + + The pipeline connects these modules in the following order: + Multi-File Source -> File Content Extractor -> Schema Transform -> Deserialize, + with monitoring at each stage. + """ + + module_config = builder.get_current_module_config() + file_source_config = module_config.get("file_source_config", {}) + try: + validated_config = FileSourcePipeSchema(**file_source_config) + except ValidationError as e: + error_messages = '; '.join([f"{error['loc'][0]}: {error['msg']}" for error in e.errors()]) + log_error_message = f"Invalid file source configuration: {error_messages}" + logger.error(log_error_message) + raise ValueError(log_error_message) + + # Use the validated configuration + enable_monitor = validated_config.enable_monitor + + # Configure and load the multi-file source module + source_config = { + "batch_size": validated_config.batch_size, + "filenames": validated_config.filenames, + "watch_interval": validated_config.watch_interval, + "watch_dir": validated_config.watch, + } + multi_file_loader = MultiFileSourceLoaderFactory.get_instance("multi_file_source", {"source_config": source_config}) + + # Configure and load the file content extractor module + file_content_extractor_config = { + "batch_size": validated_config.batch_size, + "num_threads": validated_config.num_threads, + "chunk_size": validated_config.chunk_size, + "chunk_overlap": validated_config.chunk_overlap, + "converters_meta": validated_config.converters_meta + } + extractor_loader = ContentExtractorLoaderFactory.get_instance("file_content_extractor", + file_content_extractor_config) + + # Configure and load the schema transformation module + transform_config = { + "schema_transform_config": { + "summary": { + "dtype": "str", "op_type": "select" + }, + "title": { + "dtype": "str", "op_type": "select" + }, + "content": { + "dtype": "str", "op_type": "select" + }, + "source": { + "dtype": "str", "op_type": "select" + } + } + } + schema_transform_loader = SchemaTransformLoaderFactory.get_instance("schema_transform", transform_config) + + deserialize_loader = DeserializeLoaderFactory.get_instance( + "deserialize", { + "batch_size": validated_config.batch_size, "message_type": "ControlMessage" + }) + + vdb_resource_tagging_loader = VDBResourceTaggingLoaderFactory.get_instance( + "vdb_resource_tagging", {"vdb_resource_name": validated_config.vdb_resource_name}) + + monitor_1_loader = MonitorLoaderFactory.get_instance( + "monitor_1", { + "description": "FileSourcePipe Transform", "silence_monitors": not enable_monitor + }) + + monitor_2_loader = MonitorLoaderFactory.get_instance( + "monitor_2", { + "description": "File Source Deserialize", "silence_monitors": not enable_monitor + }) + + # Load modules + multi_file_module = multi_file_loader.load(builder=builder) + file_content_extractor_module = extractor_loader.load(builder=builder) + transform_module = schema_transform_loader.load(builder=builder) + monitor_1_module = monitor_1_loader.load(builder=builder) + deserialize_module = deserialize_loader.load(builder=builder) + vdb_resource_tagging_module = vdb_resource_tagging_loader.load(builder=builder) + monitor_2_module = monitor_2_loader.load(builder=builder) + + # Connect the modules in the pipeline + builder.make_edge(multi_file_module.output_port("output"), file_content_extractor_module.input_port("input")) + builder.make_edge(file_content_extractor_module.output_port("output"), transform_module.input_port("input")) + builder.make_edge(transform_module.output_port("output"), monitor_1_module.input_port("input")) + builder.make_edge(monitor_1_module.output_port("output"), deserialize_module.input_port("input")) + builder.make_edge(deserialize_module.output_port("output"), vdb_resource_tagging_module.input_port("input")) + builder.make_edge(vdb_resource_tagging_module.output_port("output"), monitor_2_module.input_port("input")) + + # Register the final output of the transformation module + builder.register_module_output("output", monitor_2_module.output_port("output")) diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/kafka_source_module.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/kafka_source_module.py new file mode 100644 index 000000000..c75d7659d --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/kafka_source_module.py @@ -0,0 +1,202 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from functools import partial +import time +import os +import typing +from enum import Enum +from io import StringIO + +import confluent_kafka as ck +import mrc +import pandas as pd +from pydantic import ValidationError + +import cudf + +from morpheus.messages import MessageMeta +from morpheus.utils.module_utils import ModuleLoaderFactory +from vdb_upload.schemas.kafka_source_schema import KafkaSourceSchema +from morpheus.utils.module_utils import register_module + +logger = logging.getLogger(__name__) + +KafkaSourceLoaderFactory = ModuleLoaderFactory("kafka_source", "morpheus_examples_llm", KafkaSourceSchema) + + +class AutoOffsetReset(Enum): + """The supported offset options in Kafka""" + EARLIEST = "earliest" + LATEST = "latest" + NONE = "none" + + +@register_module("kafka_source", "morpheus_examples_llm") +def _kafka_source_pipe(builder: mrc.Builder): + + module_config = builder.get_current_module_config() + + # Validate the module configuration using the contract + try: + kafka_config = KafkaSourceSchema(**module_config) + except ValidationError as e: + error_messages = '; '.join([f"{error['loc'][0]}: {error['msg']}" for error in e.errors()]) + log_error_message = f"Invalid kafka configuration: {error_messages}" + logger.error(log_error_message) + raise ValueError(log_error_message) + + _max_batch_size = kafka_config.max_batch_size + bootstrap_servers = kafka_config.bootstrap_servers + input_topic = kafka_config.input_topic + group_id = kafka_config.group_id + client_id = None #module_config.client_id + poll_interval = kafka_config.poll_interval + disable_commit = kafka_config.disable_commit + disable_pre_filtering = kafka_config.disable_pre_filtering + auto_offset_reset = AutoOffsetReset(kafka_config.auto_offset_reset) + stop_after = kafka_config.stop_after + async_commits = kafka_config.async_commits + + if (input_topic is None): + input_topic = ["work_queue"] + + if isinstance(auto_offset_reset, AutoOffsetReset): + auto_offset_reset = auto_offset_reset.value + + if (bootstrap_servers == "auto"): + bootstrap_servers = auto_determine_bootstrap() + + _consumer_params = { + 'bootstrap.servers': bootstrap_servers, + 'group.id': group_id, + 'session.timeout.ms': "60000", + "auto.offset.reset": auto_offset_reset + } + if client_id is not None: + _consumer_params['client.id'] = client_id + + if isinstance(input_topic, str): + input_topic = [input_topic] + + # Remove duplicate topics if there are any. + topics = list(set(input_topic)) + + # Flag to indicate whether or not we should stop + stop_requested = False + poll_interval = pd.Timedelta(poll_interval).total_seconds() + started = False + + records_emitted = 0 + num_messages = 0 + + def _process_batch(consumer, batch, records_emitted, num_messages): + message_meta = None + if len(batch): + buffer = StringIO() + + for msg in batch: + payload = msg.value() + if payload is not None: + buffer.write(payload.decode("utf-8")) + buffer.write("\n") + + df = None + try: + buffer.seek(0) + df = cudf.io.read_json(buffer, engine='cudf', lines=True, orient='records') + df['summary'] = "summary" + df['title'] = "title" + df['link'] = "link" + except Exception as e: + logger.error("Error parsing payload into a dataframe : %s", e) + finally: + if (not disable_commit): + for msg in batch: + consumer.commit(message=msg, asynchronous=async_commits) + + if df is not None: + num_records = len(df) + message_meta = MessageMeta(df) + records_emitted += num_records + num_messages += 1 + + if stop_after > 0 and records_emitted >= stop_after: + stop_requested = True + + batch.clear() + + return message_meta, records_emitted, num_messages + + + def _source_generator(records_emitted, num_messages): + + consumer = None + try: + consumer = ck.Consumer(_consumer_params) + consumer.subscribe(topics) + + batch = [] + + while not stop_requested: + do_process_batch = False + do_sleep = False + + msg = consumer.poll(timeout=1.0) + if msg is None: + do_process_batch = True + do_sleep = True + + else: + msg_error = msg.error() + if msg_error is None: + batch.append(msg) + if len(batch) == _max_batch_size: + do_process_batch = True + + elif msg_error == ck.KafkaError._PARTITION_EOF: + do_process_batch = True + do_sleep = True + else: + raise ck.KafkaException(msg_error) + + if do_process_batch: + message_meta, records_emitted, num_messages = _process_batch( + consumer, batch, records_emitted, num_messages) + if message_meta is not None: + yield message_meta + + if do_sleep and not stop_requested: + time.sleep(poll_interval) + + message_meta, records_emitted, num_messages = _process_batch( + consumer, batch, records_emitted, num_messages) + if message_meta is not None: + yield message_meta + + finally: + # Close the consumer and call on_completed + if (consumer): + consumer.close() + + # add node to the graph + source_generator = partial(_source_generator, records_emitted, num_messages) + source = builder.make_source('kafka_source', source_generator) + source.launch_options.pe_count = 1 + source.launch_options.engines_per_pe = os.cpu_count() + + # Register the output of the module + builder.register_module_output("output", source) + diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/kafka_source_pipe.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/kafka_source_pipe.py new file mode 100644 index 000000000..9aae12567 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/kafka_source_pipe.py @@ -0,0 +1,205 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import mrc +from pydantic import ValidationError + + +from morpheus.modules.general.monitor import MonitorLoaderFactory +from morpheus.modules.preprocess.deserialize import DeserializeLoaderFactory +from morpheus.modules.input.rss_source import RSSSourceLoaderFactory +from morpheus.utils.module_utils import ModuleLoaderFactory +from morpheus.utils.module_utils import register_module + +from vdb_upload.module.kafka_source_module import KafkaSourceLoaderFactory +from vdb_upload.module.vdb_resource_tagging_module import VDBResourceTaggingLoaderFactory +from vdb_upload.module.web_scraper_module import WebScraperLoaderFactory +from vdb_upload.module.schema_transform import SchemaTransformLoaderFactory +from vdb_upload.schemas.rss_source_pipe_schema import RSSSourcePipeSchema +from vdb_upload.schemas.kafka_source_pipe_schema import KafkaSourcePipeSchema +from vdb_upload import module + + +logger = logging.getLogger(__name__) + +KafkaSourcePipeLoaderFactory = ModuleLoaderFactory("kafka_source_pipe", "morpheus_examples_llm", KafkaSourcePipeSchema) + + +@register_module("kafka_source_pipe", "morpheus_examples_llm") +def _kafka_source_pipe(builder: mrc.Builder, default_transform="raw_chunker"): + """ + Sets up a pipeline for processing kafka sources. + + This function configures a pipeline that subscribes to a kafka topic, processes received content + based on specified configurations, and outputs the processed data. It integrates modules for + kafka sourcing, content extraction, and schema transformation, along with monitoring + at various stages. + + Parameters + ---------- + builder : mrc.Builder + The Morpheus builder to which the pipeline modules will be added. + default_transform : str + The default extractor is not provided. + + Notes + ----- + The module configuration can include the following parameters: + + - **kafka_source_config**: Configuration for the kafka source module. + - **stage_config**: Source stage level configuration. + - **enable_monitor**: Boolean to enable monitoring. + - **namespace**: Name of namespace of stage modules. + - **module_id**: Name of source module. + - **module_output_id**: Name of output port of source module. + - **transform_type**: Name of module to transform data. + - **deserialize_config**: Deserialization module configurations. + - output_batch_size: Number of elements per batch emitted from source + - **kafka_config**: Kafka module configurations. + - **max_batch_size**: Number of kafka messages per batch emitted from kafka source module. + - **bootstrap_servers**: URL to a Kafka broker that can serve data. + - **input_topic**: Name of topic containing messages to process. + - **group_id**: Consumer group this worker/stage will belong to. + - **poll_interval**: How often to poll Kafka for new data (pandas format). + - **disable_commit**: Boolean to control possible arrival of duplicate messages. + - **disable_pre_filtering**: Boolean controling skipping committing messages as they are pulled off the server. + - **auto_offset_reset**: Decision to consume from the beginning of a topic partition or only new messages. + - **stop_after**: Number of records before stopping ingestion of new messages. + - **async_commits**: Boolean to decided to asynchronously acknowledge consuming Kafka messages. + - **extractor_config**: Provide extractor specific parameters. + - **kwargs**: Keyword arguments used for extractor specific parameters. + - **vdb_config**: Vector Database parameters. + - **vdb_resource_name**: Name of the Milvus database collection to write vectors. + + The pipeline connects these modules in the following order: + Kafka Source -> Content Extractor -> Schema Transform -> Deserialize, + with monitoring at each stage. The content extraction method is determined + by pipeline configuration. + """ + + # Load and validate the module configuration from the builder + module_config = builder.get_current_module_config() + kafka_source_config = module_config.get("kafka_config", {}) + + try: + validated_config = KafkaSourcePipeSchema(**kafka_source_config) + except ValidationError as e: + error_messages = '; '.join([f"{error['loc'][0]}: {error['msg']}" for error in e.errors()]) + log_error_message = f"Invalid Kafka source configuration: {error_messages}" + logger.error(log_error_message) + raise ValueError(log_error_message) + + enable_monitor = validated_config.stage_config.enable_monitor + + kafka_source_loader = KafkaSourceLoaderFactory.get_instance( + "kafka_source", validated_config.kafka_config) + + transform_type = validated_config.stage_config.transform_type + transform_config_key = f"{transform_type}_config" + + if not hasattr(module, transform_type): + transform_type = default_transform + + transform_loader = getattr(module, transform_type).get_instance( + transform_type, {transform_config_key: getattr(validated_config, transform_config_key)}) + + transform_config = { + "schema_transform_config": { + "summary": { + "dtype": "str", "op_type": "select" + }, + "title": { + "dtype": "str", "op_type": "select" + }, + "content": { + "from": "page_content", "dtype": "str", "op_type": "rename" + }, + "source": { + "from": "payload", "dtype": "str", "op_type": "rename" + } + } + } + + schema_transform_loader = SchemaTransformLoaderFactory.get_instance("schema_transform", transform_config) + + deserialize_loader = DeserializeLoaderFactory.get_instance( + "deserialize", { + "batch_size": validated_config.deserialize_config.output_batch_size, "message_type": "ControlMessage" + }) + + vdb_resource_tagging_loader = VDBResourceTaggingLoaderFactory.get_instance( + "vdb_resource_tagging", {"vdb_resource_name": validated_config.vdb_config.vdb_resource_name}) + + monitor_0_loader = MonitorLoaderFactory.get_instance( + "monitor_0", { + "description": f"KafkaSourcePipe Kafka Source ({transform_type})", + "unit": "messages", + "silence_monitors": not enable_monitor + }) + + monitor_1_loader = MonitorLoaderFactory.get_instance( + "monitor_1", { + "description": f"KafkaSourcePipe Transform ({transform_type})", + "unit": "chunks", + "silence_monitors": not enable_monitor + }) + + monitor_2_loader = MonitorLoaderFactory.get_instance( + "monitor_2", { + "description": f"KafkaSourcePipe Schema Transform ({transform_type})", + "unit": "chunks", + "silence_monitors": not enable_monitor + }) + + monitor_3_loader = MonitorLoaderFactory.get_instance( + "monitor_3", { + "description": f"KafkaSourcePipe Deserialize ({transform_type})", + "unit": "chunks", + "silence_monitors": not enable_monitor + }) + + monitor_4_loader = MonitorLoaderFactory.get_instance( + "monitor_4", { + "description": f"KafkaSourcePipe VDB Tagger ({transform_type})", + "unit": "chunks", + "silence_monitors": not enable_monitor + }) + + # Load modules + kafka_source_module = kafka_source_loader.load(builder=builder) + monitor_0_module = monitor_0_loader.load(builder=builder) + transform_loader_module = transform_loader.load(builder=builder) + monitor_1_module = monitor_1_loader.load(builder=builder) + schema_transform_module = schema_transform_loader.load(builder=builder) + monitor_2_module = monitor_2_loader.load(builder=builder) + deserialize_module = deserialize_loader.load(builder=builder) + monitor_3_module = monitor_3_loader.load(builder=builder) + vdb_resource_tagging_module = vdb_resource_tagging_loader.load(builder=builder) + monitor_4_module = monitor_4_loader.load(builder=builder) + + # Connect the modules: Kafka source -> Monitor + builder.make_edge(kafka_source_module.output_port("output"), monitor_0_module.input_port("input")) + builder.make_edge(monitor_0_module.output_port("output"), transform_loader_module.input_port("input")) + builder.make_edge(transform_loader_module.output_port("output"), monitor_1_module.input_port("input")) + builder.make_edge(monitor_1_module.output_port("output"), schema_transform_module.input_port("input")) + builder.make_edge(schema_transform_module.output_port("output"), monitor_2_module.input_port("input")) + builder.make_edge(monitor_2_module.output_port("output"), deserialize_module.input_port("input")) + builder.make_edge(deserialize_module.output_port("output"), monitor_3_module.input_port("input")) + builder.make_edge(monitor_3_module.output_port("output"), vdb_resource_tagging_module.input_port("input")) + builder.make_edge(vdb_resource_tagging_module.output_port("output"), monitor_4_module.input_port("input")) + + # Register the final output of the transformation module + builder.register_module_output("output", monitor_4_module.output_port("output")) diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/raw_chunker_module.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/raw_chunker_module.py new file mode 100644 index 000000000..8de1f66eb --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/raw_chunker_module.py @@ -0,0 +1,100 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os + +from functools import partial + +import mrc +import mrc.core.operators as ops +import pandas as pd +from langchain.text_splitter import RecursiveCharacterTextSplitter +from pydantic import ValidationError + +import cudf + +from morpheus.messages import MessageMeta +from morpheus.utils.module_utils import ModuleLoaderFactory +from morpheus.utils.module_utils import register_module +from vdb_upload.schemas.raw_chunker_schema import RawChunkerSchema + +logger = logging.getLogger(__name__) + + +RawChunkerLoaderFactory = ModuleLoaderFactory("raw_chunk", "morpheus_examples_llm", RawChunkerSchema) + + +def splitter(msg: MessageMeta, text_splitter, payload_column) -> MessageMeta: + + """ + Applies chunking strategy to raw text document. + Assumes each document has been preprocessed. + """ + + if (payload_column not in msg.get_column_names()): + return None + + df = msg.copy_dataframe() + + if isinstance(df, cudf.DataFrame): + df: pd.DataFrame = df.to_pandas() + + # Convert the dataframe into a list of dictionaries + df_dicts = df.to_dict(orient="records") + + final_rows: list[dict] = [] + + for row in df_dicts: + + split_text = text_splitter.split_text( + row[payload_column]) + + row["payload"] = "raw_source" + + for text in split_text: + row_cp = row.copy() + row_cp.update({"page_content": text}) + final_rows.append(row_cp) + + return MessageMeta(df=cudf.DataFrame(final_rows)) + + +@register_module("raw_chunk", "morpheus_examples_llm") +def _raw_chunker(builder: mrc.Builder): + module_config = builder.get_current_module_config() + + # Validate the module configuration using the contract + try: + raw_chunk_config = RawChunkerSchema(**module_config.get("raw_chunker_config", {})) + except ValidationError as e: + error_messages = '; '.join([f"{error['loc'][0]}: {error['msg']}" for error in e.errors()]) + log_error_message = f"Invalid raw chunker configuration: {error_messages}" + logger.error(log_error_message) + raise ValueError(log_error_message) + + payload_column = raw_chunk_config.payload_column + chunk_size = raw_chunk_config.chunk_size + + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, + chunk_overlap=chunk_size // 10, + length_function=len) + + + op_func = partial(splitter, text_splitter=text_splitter, payload_column=payload_column) + + node = builder.make_node("raw_chunker", ops.map(op_func), ops.filter(lambda x: x is not None)) + + builder.register_module_input("input", node) + builder.register_module_output("output", node) diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/rss_source_pipe.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/rss_source_pipe.py new file mode 100644 index 000000000..7f1475d3c --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/rss_source_pipe.py @@ -0,0 +1,170 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import mrc +from pydantic import ValidationError +from vdb_upload.module.schema_transform import SchemaTransformLoaderFactory +from vdb_upload.schemas.rss_source_pipe_schema import RSSSourcePipeSchema + +from morpheus.modules.general.monitor import MonitorLoaderFactory +from morpheus.modules.input.rss_source import RSSSourceLoaderFactory +from morpheus.modules.preprocess.deserialize import DeserializeLoaderFactory +from morpheus.utils.module_utils import ModuleLoaderFactory +from morpheus.utils.module_utils import register_module + +from vdb_upload.module.vdb_resource_tagging_module import VDBResourceTaggingLoaderFactory +from vdb_upload.module.web_scraper_module import WebScraperLoaderFactory + +logger = logging.getLogger(__name__) + +RSSSourcePipeLoaderFactory = ModuleLoaderFactory("rss_source_pipe", "morpheus_examples_llm", RSSSourcePipeSchema) + + +@register_module("rss_source_pipe", "morpheus_examples_llm") +def _rss_source_pipe(builder: mrc.Builder): + """ + Creates a pipeline for processing RSS feeds. + + This function sets up a pipeline that takes RSS feed data, scrapes web content + based on the feed, and then outputs the scraped data. It integrates modules like RSS source, + web scraper, and deserializer, along with monitoring for each stage. + + Parameters + ---------- + builder : mrc.Builder + The Morpheus builder to which the pipeline modules will be added. + + Notes + ----- + The module configuration can include the following parameters: + + - **rss_config**: Configuration for the RSS source module. + - **batch_size**: Number of RSS feed items to process in each batch. + - **cache_dir**: Directory for caching RSS feed data. + - **cooldown_interval_sec**: Cooldown interval in seconds between fetches. + - **enable_cache**: Boolean to enable caching of feed data. + - **enable_monitor**: Boolean to enable monitoring for this module. + - **feed_input**: List of RSS feed URLs to process. + - **interval_sec**: Interval in seconds for fetching new feed items. + - **request_timeout_sec**: Timeout in seconds for RSS feed requests. + - **run_indefinitely**: Boolean to indicate continuous running. + - **stop_after**: Number of records to process before stopping (0 for indefinite). + - **web_scraper_config**: Configuration for the web scraper module. + - **chunk_overlap**: Overlap size for chunks in web scraping. + - **chunk_size**: Size of content chunks for processing. + - **enable_cache**: Boolean to enable caching of scraped data. + + The pipeline connects these modules in the following order: + RSS Source -> Web Scraper -> Deserializer, with monitoring at each stage. + """ + + # Load and validate the module configuration from the builder + module_config = builder.get_current_module_config() + rss_config = module_config.get("rss_config", {}) + try: + validated_config = RSSSourcePipeSchema(**rss_config) + except ValidationError as e: + error_messages = '; '.join([f"{error['loc'][0]}: {error['msg']}" for error in e.errors()]) + log_error_message = f"Invalid RSS source configuration: {error_messages}" + logger.error(log_error_message) + raise ValueError(log_error_message) + + enable_monitor = validated_config.enable_monitor + + rss_source_config = { + "feed_input": validated_config.feed_input, + "run_indefinitely": validated_config.run_indefinitely, + "batch_size": validated_config.batch_size, + "enable_cache": validated_config.enable_cache, + "cache_dir": validated_config.cache_dir, + "cooldown_interval_sec": validated_config.cooldown_interval_sec, + "request_timeout_sec": validated_config.request_timeout_sec, + "interval_sec": validated_config.interval_sec, + "stop_after_sec": validated_config.stop_after_sec, + } + rss_source_loader = RSSSourceLoaderFactory.get_instance("rss_source", {"rss_source": rss_source_config}) + + web_scraper_loader = WebScraperLoaderFactory.get_instance( + "web_scraper", { + "web_scraper_config": validated_config.web_scraper_config, + }) + + transform_config = { + "schema_transform_config": { + "summary": { + "dtype": "str", "op_type": "select" + }, + "title": { + "dtype": "str", "op_type": "select" + }, + "content": { + "from": "page_content", "dtype": "str", "op_type": "rename" + }, + "source": { + "from": "link", "dtype": "str", "op_type": "rename" + } + } + } + schema_transform_loader = SchemaTransformLoaderFactory.get_instance("schema_transform", transform_config) + + deserialize_loader = DeserializeLoaderFactory.get_instance( + "deserialize", { + "batch_size": validated_config.output_batch_size, "message_type": "ControlMessage" + }) + + vdb_resource_tagging_loader = VDBResourceTaggingLoaderFactory.get_instance( + "vdb_resource_tagging", {"vdb_resource_name": validated_config.vdb_resource_name}) + + monitor_0_loader = MonitorLoaderFactory.get_instance( + "monitor_m1", { + "description": "RSSSourcePipe RSS Source", "silence_monitors": not enable_monitor + }) + monitor_1_loader = MonitorLoaderFactory.get_instance( + "monitor_0", { + "description": "RSSSourcePipe Web Scraper", "silence_monitors": not enable_monitor + }) + monitor_2_loader = MonitorLoaderFactory.get_instance( + "monitor_1", { + "description": "RSSSourcePipe Transform", "silence_monitors": not enable_monitor + }) + monitor_3_loader = MonitorLoaderFactory.get_instance( + "monitor_2", { + "description": "RSSSourcePipe Deserialize", "silence_monitors": not enable_monitor + }) + + # Load modules + rss_source_module = rss_source_loader.load(builder=builder) + monitor_0_loader = monitor_0_loader.load(builder=builder) + web_scraper_module = web_scraper_loader.load(builder=builder) + monitor_0_module = monitor_1_loader.load(builder=builder) + transform_module = schema_transform_loader.load(builder=builder) + monitor_1_module = monitor_2_loader.load(builder=builder) + deserialize_module = deserialize_loader.load(builder=builder) + vdb_resource_tagging_module = vdb_resource_tagging_loader.load(builder=builder) + monitor_2_module = monitor_3_loader.load(builder=builder) + + # Connect the modules: RSS source -> Web scraper -> Schema transform + builder.make_edge(rss_source_module.output_port("output"), monitor_0_loader.input_port("input")) + builder.make_edge(monitor_0_loader.output_port("output"), web_scraper_module.input_port("input")) + builder.make_edge(web_scraper_module.output_port("output"), monitor_0_module.input_port("input")) + builder.make_edge(monitor_0_module.output_port("output"), transform_module.input_port("input")) + builder.make_edge(transform_module.output_port("output"), monitor_1_module.input_port("input")) + builder.make_edge(monitor_1_module.output_port("output"), deserialize_module.input_port("input")) + builder.make_edge(deserialize_module.output_port("output"), vdb_resource_tagging_module.input_port("input")) + builder.make_edge(vdb_resource_tagging_module.output_port("output"), monitor_2_module.input_port("input")) + + # Register the final output of the transformation module + builder.register_module_output("output", monitor_2_module.output_port("output")) diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/schema_transform.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/schema_transform.py new file mode 100644 index 000000000..fd5775698 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/schema_transform.py @@ -0,0 +1,142 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any +from typing import Dict +from typing import Optional + +import mrc +import mrc.core.operators as ops +from pydantic import BaseModel +from pydantic import Field +from pydantic import ValidationError + +import cudf + +from morpheus.messages import MessageMeta +from morpheus.utils.column_info import ColumnInfo +from morpheus.utils.column_info import DataFrameInputSchema +from morpheus.utils.column_info import RenameColumn +from morpheus.utils.module_utils import ModuleLoaderFactory +from morpheus.utils.module_utils import register_module + +logger = logging.getLogger(__name__) + + +class ColumnTransformSchema(BaseModel): + dtype: str + op_type: str + from_: Optional[str] = Field(None, alias="from") + + class Config: + extra = "forbid" + + +class SchemaTransformSchema(BaseModel): + schema_transform_config: Dict[str, Dict[str, Any]] = Field(default_factory=dict) + + class Config: + extra = "forbid" + + +SchemaTransformLoaderFactory = ModuleLoaderFactory("schema_transform", "morpheus_examples_llm", SchemaTransformSchema) + + +@register_module("schema_transform", "morpheus_examples_llm") +def _schema_transform(builder: mrc.Builder): + """ + A module for applying simple DataFrame schema transform policies. + + This module reads the configuration to determine how to set data types for columns, select, or rename them in the + dataframe. + + Parameters + ---------- + builder : mrc.Builder + The Morpheus pipeline builder object. + + Notes + ------------- + The configuration should be passed to the module through the `module_config` attribute of the builder. It should + contain a dictionary where each key is a column name, and the value is another dictionary with keys 'dtype' for + data type, 'op_type' for operation type ('select' or 'rename'), and optionally 'from' for the original column + name (if the column is to be renamed). + + Example Configuration + --------------------- + { + "summary": {"dtype": "str", "op_type": "select"}, + "title": {"dtype": "str", "op_type": "select"}, + "content": {"from": "page_content", "dtype": "str", "op_type": "rename"}, + "source": {"from": "link", "dtype": "str", "op_type": "rename"} + } + """ + + module_config = builder.get_current_module_config() + + # Validate the module configuration using the contract + try: + validated_config = SchemaTransformSchema(**module_config) + except ValidationError as e: + error_messages = '; '.join([f"{error['loc'][0]}: {error['msg']}" for error in e.errors()]) + log_error_message = f"Invalid schema transform configuration: {error_messages}" + logger.error(log_error_message) + + raise + + schema_config = validated_config.schema_transform_config + + source_column_info = [] + preserve_columns = [] + + for col_name, col_config in schema_config.items(): + op_type = col_config.get("op_type") + if (op_type == "rename"): + # Handling renamed columns + source_column_info.append( + RenameColumn(name=col_name, dtype=col_config["dtype"], input_name=col_config["from"])) + elif (op_type == "select"): + # Handling regular columns + source_column_info.append(ColumnInfo(name=col_name, dtype=col_config["dtype"])) + else: + raise ValueError(f"Unknown op_type '{op_type}' for column '{col_name}'") + + preserve_columns.append(col_name) + + source_schema = DataFrameInputSchema(column_info=source_column_info) + + def do_transform(message: MessageMeta): + if (message is None): + return None + + with message.mutable_dataframe() as mdf: + if (len(mdf) == 0): + return None + + for col_info in source_schema.column_info: + try: + mdf[col_info.name] = col_info._process_column(mdf) + except Exception as exc_info: + logger.exception("Failed to process column '%s'. Dataframe: \n%s\n%s", col_info.name, mdf, exc_info) + return None + + mdf = mdf[preserve_columns] + + return MessageMeta(df=cudf.DataFrame(mdf)) + + node = builder.make_node("schema_transform", ops.map(do_transform), ops.filter(lambda x: x is not None)) + + builder.register_module_input("input", node) + builder.register_module_output("output", node) \ No newline at end of file diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/vdb_resource_tagging_module.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/vdb_resource_tagging_module.py new file mode 100644 index 000000000..9b4e42727 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/vdb_resource_tagging_module.py @@ -0,0 +1,60 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import mrc +from pydantic import BaseModel +from pydantic import ValidationError + +from morpheus.messages import ControlMessage +from morpheus.utils.module_utils import ModuleLoaderFactory +from morpheus.utils.module_utils import register_module + +logger = logging.getLogger(__name__) + + +class VDBResourceTaggingSchema(BaseModel): + vdb_resource_name: str + + class Config: + extra = "forbid" + + +VDBResourceTaggingLoaderFactory = ModuleLoaderFactory("vdb_resource_tagging", + "morpheus_examples_llm", + VDBResourceTaggingSchema) + + +@register_module("vdb_resource_tagging", "morpheus_examples_llm") +def _vdb_resource_tagging(builder: mrc.Builder): + module_config = builder.get_current_module_config() + try: + validated_config = VDBResourceTaggingSchema(**module_config) + except ValidationError as e: + error_messages = '; '.join([f"{error['loc'][0]}: {error['msg']}" for error in e.errors()]) + log_error_message = f"Invalid RSS source configuration: {error_messages}" + logger.error(log_error_message) + + raise + + def on_data(data: ControlMessage): + data.set_metadata("vdb_resource", validated_config.vdb_resource_name) + + return data + + node = builder.make_node("vdb_resource_tagging", on_data) + + builder.register_module_input("input", node) + builder.register_module_output("output", node) \ No newline at end of file diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/web_scraper_module.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/web_scraper_module.py new file mode 100644 index 000000000..5a3af2c7e --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/module/web_scraper_module.py @@ -0,0 +1,152 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from functools import partial + +import mrc +import mrc.core.operators as ops +import requests +import requests_cache +from bs4 import BeautifulSoup +import lxml +from langchain.text_splitter import RecursiveCharacterTextSplitter +from pydantic import BaseModel # pylint: disable=no-name-in-module +from pydantic import ValidationError + +import cudf + +from morpheus.messages import MessageMeta +from morpheus.utils.module_utils import ModuleLoaderFactory +from morpheus.utils.module_utils import register_module + +logger = logging.getLogger(__name__) + + +class WebScraperSchema(BaseModel): + link_column: str = "link" + chunk_size: int = 512 + chunk_overlap: int = 51 + enable_cache: bool = False + cache_path: str = "./.cache/http/RSSDownloadStage.sqlite" + cache_dir: str = "./.cache/llm/rss" + + class Config: + extra = "forbid" + + +WebScraperLoaderFactory = ModuleLoaderFactory("web_scraper", "morpheus_examples_llm", WebScraperSchema) + + +def download_and_split(msg: MessageMeta, text_splitter, link_column, session) -> MessageMeta: + """ + Uses the HTTP GET method to download/scrape the links found in the message, splits the scraped data, and stores + it in the output, excludes output for any links which produce an error. + """ + if (link_column not in msg.get_column_names()): + return None + + df = msg.copy_dataframe() + + # Convert the dataframe into a list of dictionaries + df_dicts = df.to_dict(orient="records") + + final_rows: list[dict] = [] + + for row in df_dicts: + url = row[link_column] + + try: + # Try to get the page content + response = session.get(url) + + if (not response.ok): + logger.warning("Error downloading document from URL '%s'. " + "Returned code: %s. With reason: '%s'", + url, + response.status_code, + response.reason) + continue + + raw_html = response.text + soup = BeautifulSoup(raw_html, "lxml") + + text = soup.get_text(strip=True, separator=' ') + split_text = text_splitter.split_text(text) + + for text in split_text: + row_cp = row.copy() + row_cp.update({"page_content": text}) + final_rows.append(row_cp) + + if isinstance(response, requests_cache.models.response.CachedResponse): + logger.debug("Processed cached page: '%s'", url) + else: + logger.debug("Processed page: '%s'", url) + + except ValueError as exc: + logger.error("Error parsing document: %s", exc) + continue + except Exception as exc: + logger.error("Error downloading document from URL '%s'. Error: %s", url, exc) + continue + + return MessageMeta(df=cudf.DataFrame(final_rows)) + + +@register_module("web_scraper", "morpheus_examples_llm") +def _web_scraper(builder: mrc.Builder): + module_config = builder.get_current_module_config() + + # Validate the module configuration using the contract + try: + web_scraper_config = WebScraperSchema(**module_config.get("web_scraper_config", {})) + except ValidationError as e: + error_messages = '; '.join([f"{error['loc'][0]}: {error['msg']}" for error in e.errors()]) + log_error_message = f"Invalid web scraper configuration: {error_messages}" + logger.error(log_error_message) + + raise + + link_column = web_scraper_config.link_column + chunk_size = web_scraper_config.chunk_size + enable_cache = web_scraper_config.enable_cache + cache_path = web_scraper_config.cache_path + cache_dir = web_scraper_config.cache_dir + + if (enable_cache): + os.makedirs(cache_dir, exist_ok=True) + + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, + chunk_overlap=chunk_size // 10, + length_function=len) + + if (enable_cache): + session = requests_cache.CachedSession(cache_path, backend='sqlite') + else: + session = requests.Session() + + session.headers.update({ + "User-Agent": + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36" + }) + + op_func = partial(download_and_split, text_splitter=text_splitter, link_column=link_column, session=session) + + node = builder.make_node("web_scraper", ops.map(op_func), ops.filter(lambda x: x is not None)) + node.launch_options.pe_count = 1 + node.launch_options.engines_per_pe = os.cpu_count() + + builder.register_module_input("input", node) + builder.register_module_output("output", node) \ No newline at end of file diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/pipeline.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/pipeline.py new file mode 100644 index 000000000..d8d36a042 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/pipeline.py @@ -0,0 +1,102 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import time +import typing + +from morpheus.config import Config +from morpheus.pipeline.pipeline import Pipeline +from morpheus.stages.general.monitor_stage import MonitorStage +from morpheus.stages.general.trigger_stage import TriggerStage +from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage +from morpheus.stages.output.write_to_vector_db_stage import WriteToVectorDBStage +from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage + +from vdb_upload.helper import process_vdb_sources + +logger = logging.getLogger(__name__) + + +def pipeline(pipeline_config: Config, + source_config: typing.List, + vdb_config: typing.Dict, + embeddings_config: typing.Dict, + tokenizer_config: typing.Dict) -> float: + """ + Sets up and runs a data processing pipeline based on provided configurations. + + Parameters + ---------- + source_config : Dict + Configuration for data sources (e.g., 'rss', 'filesystem'). + vdb_config : Dict + Configuration for the vector database. + pipeline_config : Dict + General configuration for the pipeline (e.g., number of threads, batch sizes). + embeddings_config : Dict + Configuration for embeddings (e.g., model name, embedding size). + + Returns + ------- + float + The start time of the pipeline execution. + """ + + isolate_embeddings = embeddings_config.get('isolate_embeddings', False) + + pipe = Pipeline(pipeline_config) + + vdb_sources = process_vdb_sources(pipe, pipeline_config, source_config) + + trigger = None + if (isolate_embeddings): + trigger = pipe.add_stage(TriggerStage(pipeline_config)) + + nlp_stage = pipe.add_stage(PreprocessNLPStage(pipeline_config, **tokenizer_config.get("model_kwargs", {}))) + + monitor_1 = pipe.add_stage( + MonitorStage(pipeline_config, description="Tokenize rate", unit='chunks', delayed_start=False)) + + embedding_stage = pipe.add_stage(TritonInferenceStage(pipeline_config, **embeddings_config.get('model_kwargs', {}))) + + monitor_2 = pipe.add_stage( + MonitorStage(pipeline_config, description="Inference rate", unit="embeddings", delayed_start=False)) + + vector_db = pipe.add_stage(WriteToVectorDBStage(pipeline_config, **vdb_config)) + + monitor_3 = pipe.add_stage( + MonitorStage(pipeline_config, description="Upload rate", unit="embeddings", delayed_start=False)) + + # Connect the pipeline + for source_output in vdb_sources: + if (isolate_embeddings): + pipe.add_edge(source_output, trigger) + else: + pipe.add_edge(source_output, nlp_stage) + + if (isolate_embeddings): + pipe.add_edge(trigger, nlp_stage) + + pipe.add_edge(nlp_stage, monitor_1) + pipe.add_edge(monitor_1, embedding_stage) + pipe.add_edge(embedding_stage, monitor_2) + pipe.add_edge(monitor_2, vector_db) + pipe.add_edge(vector_db, monitor_3) + + start_time = time.time() + + pipe.run() + + return start_time diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/run.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/run.py new file mode 100644 index 000000000..cff89dc18 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/run.py @@ -0,0 +1,230 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os + +import click +from vdb_upload.vdb_utils import build_cli_configs +from vdb_upload.vdb_utils import build_final_config +from vdb_upload.vdb_utils import is_valid_service + +logger = logging.getLogger(__name__) + + +@click.group(name=__name__) +def run(): + pass + + +@run.command() +@click.option( + "--content_chunking_size", + default=512, # Set a sensible default value + type=click.IntRange(min=1), # Ensure that only positive integers are valid + help="The size of content chunks for processing.") +@click.option( + "--embedding_size", + default=384, + type=click.IntRange(min=1), + help="Output size of the embedding model", +) +@click.option( + "--enable_cache", + is_flag=True, + default=False, + help="Enable caching of RSS feed request data.", +) +@click.option("--enable_monitors", is_flag=True, default=False, help="Enable or disable monitor functionality.") +@click.option('--file_source', multiple=True, default=[], type=str, help='List of file sources/paths to be processed.') +@click.option('--feed_inputs', multiple=True, default=[], type=str, help='List of RSS source feeds to process.') +@click.option( + "--interval_secs", + default=600, + type=click.IntRange(min=1), + help="Interval in seconds between fetching new feed items.", +) +@click.option("--isolate_embeddings", + is_flag=True, + default=False, + help="Whether to fetch all data prior to executing the rest of the pipeline.") +@click.option( + "--model_fea_length", + default=512, + type=click.IntRange(min=1), + help="Features length to use for the model", +) +@click.option( + "--model_max_batch_size", + default=64, + type=click.IntRange(min=1), + help="Max batch size to use for the model", +) +@click.option( + "--embedding_model_name", + required=True, + default='all-MiniLM-L6-v2', + help="The name of the model that is deployed on Triton server", +) +@click.option( + "--num_threads", + default=os.cpu_count(), + type=click.IntRange(min=1), + help="Number of internal pipeline threads to use", +) +@click.option( + "--pipeline_batch_size", + default=8192, + type=click.IntRange(min=1), + help=("Internal batch size for the pipeline. Can be much larger than the model batch size. " + "Also used for Kafka consumers"), +) +@click.option( + "--run_indefinitely", + is_flag=True, + default=False, + help="Indicates whether the process should run continuously.", +) +@click.option( + "--rss_request_timeout_sec", + default=2.0, # Set a default value, adjust as needed + type=click.FloatRange(min=0.0), # Ensure that only non-negative floats are valid + help="Timeout in seconds for RSS feed requests.") +@click.option("--source_type", + multiple=True, + type=click.Choice(['rss', 'filesystem'], case_sensitive=False), + default=[], + show_default=True, + help="The type of source to use. Can specify multiple times for different source types.") +@click.option( + "--stop_after", + default=0, + type=click.IntRange(min=0), + help="Stop after emitting this many records from the RSS source stage. Useful for testing. Disabled if `0`", +) +@click.option( + "--triton_server_url", + type=str, + default="triton:8001", + help="Triton server URL.", +) +@click.option( + "--vdb_config_path", + type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True), + default=None, + help="Path to a YAML configuration file.", +) +@click.option( + "--vector_db_resource_name", + type=str, + default="VDBUploadExample", + help="The identifier of the resource on which operations are to be performed in the vector database.", +) +@click.option( + "--vector_db_service", + type=str, + default="milvus", + callback=is_valid_service, + help="Name of the vector database service to use.", +) +@click.option( + "--vector_db_uri", + type=str, + default="http://milvus:19530", + help="URI for connecting to Vector Database server.", +) +def pipeline(**kwargs): + """ + Configure and run the data processing pipeline based on the specified command-line options. + + This function initializes and runs the data processing pipeline using configurations provided + via command-line options. It supports customization for various components of the pipeline such as + source type, embedding model, and vector database parameters. + + Parameters + ---------- + **kwargs : dict + Keyword arguments containing command-line options. + + Returns + ------- + The result of the internal pipeline function call. + """ + vdb_config_path = kwargs.pop('vdb_config_path', None) + cli_source_conf, cli_embed_conf, cli_pipe_conf, cli_tok_conf, cli_vdb_conf = build_cli_configs(**kwargs) + final_config = build_final_config(vdb_config_path, + cli_source_conf, + cli_embed_conf, + cli_pipe_conf, + cli_tok_conf, + cli_vdb_conf) + + # Call the internal pipeline function with the final config dictionary + from .pipeline import pipeline as _pipeline + return _pipeline(**final_config) + + +@run.command() +@click.option( + "--model_name", + required=True, + default='all-MiniLM-L6-v2', + help="The name of the model that is deployed on Triton server", +) +@click.option( + "--save_cache", + default=None, + type=click.Path(file_okay=True, dir_okay=False), + help="Location to save the cache to", +) +def langchain(**kwargs): + from .langchain import chain + + return chain(**kwargs) + + +@run.command() +@click.option( + "--model_name", + required=True, + default='all-MiniLM-L6-v2', + help="The name of the model that is deployed on Triton server", +) +@click.option( + "--model_seq_length", + default=512, + type=click.IntRange(min=1), + help="Accepted input size of the text tokens", +) +@click.option( + "--max_batch_size", + default=256, + type=click.IntRange(min=1), + help="Max batch size for the model config", +) +@click.option( + "--triton_repo", + required=True, + type=click.Path(file_okay=False, dir_okay=True), + help="Directory of the Triton Model Repo where the model will be saved", +) +@click.option( + "--output_model_name", + default=None, + help="Overrides the model name that is used in triton. Defaults to `model_name`", +) +def export_triton_model(**kwargs): + from .export_model import build_triton_model + + return build_triton_model(**kwargs) diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/__init__.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/__init__.py new file mode 100644 index 000000000..57bbf5c62 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/file_source_pipe_schema.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/file_source_pipe_schema.py new file mode 100644 index 000000000..9a45e5b02 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/file_source_pipe_schema.py @@ -0,0 +1,41 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from pydantic import BaseModel +from pydantic import Field + +logger = logging.getLogger(__name__) + + +class FileSourcePipeSchema(BaseModel): + batch_size: int = 1024 + chunk_overlap: int = 51 + chunk_size: int = 512 + converters_meta: Optional[Dict[Any, Any]] = {} # Flexible dictionary for converters metadata + enable_monitor: bool = False + extractor_config: Optional[Dict[Any, Any]] = {} # Flexible dictionary for extractor configuration + filenames: List[str] = Field(default_factory=list) # List of file paths + num_threads: int = 1 # Number of threads for processing + vdb_resource_name: str + watch: bool = False # Flag to watch file changes + watch_interval: float = -5.0 # Interval to watch file changes + + class Config: + extra = "forbid" diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/kafka_source_pipe_schema.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/kafka_source_pipe_schema.py new file mode 100644 index 000000000..97a1dde44 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/kafka_source_pipe_schema.py @@ -0,0 +1,91 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from pydantic import BaseModel +from pydantic import Field +from pydantic import validator + +from typing import Optional +from typing_extensions import TypedDict + + +logger = logging.getLogger(__name__) + + +class StageSchema(BaseModel): + enable_monitor: bool = True + module_id: str = "kafka_scrape_source_pipe" + module_output_id: str = "output" + namespace: str = "morpheus_examples_llm" + run_indefinitely: bool = True + transform_type: str = "raw_chunker" + + +class KafkaSourceSchema(TypedDict, total=False): + async_commits: bool = True + auto_offset_reset: str = "earliest" + bootstrap_servers: str = "kafka:19092" + disable_commit: bool = False + disable_pre_filtering: bool = False + enable_monitor: bool = True + group_id: str = "morpheus" + input_topic: str = "raw_queue" + max_batch_size: int #= Field(default=2) + max_concurrent: int = 10 + poll_interval: str = "10millis" + stop_after: int = 0 + + +class WebScraperSchema(TypedDict, total=False): + chunk_overlap: int = 51 + chunk_size: int = 512 + enable_cache: bool = True + cache_path: str = "./.cache/llm/html/WebScrapeModule.sqlite" + cache_dir: str = "./.cache/llm/html" + link_column: str = "payload" + + +class RawChunkerScraperSchema(TypedDict, total=False): + payload_column: str = "payload" + chunk_size: int = 512 + chunk_overlap: int = 51 + + class Config: + extra = "forbid" + + +class DeserializeSchema(BaseModel): + output_batch_size: int = 2048 + + +class VDBResourceTaggingSchema(BaseModel): + vdb_resource_name: str = "vdb_kafka" + + +class KafkaSourcePipeSchema(BaseModel): + stage_config: StageSchema + kafka_config: KafkaSourceSchema + web_scraper_config: WebScraperSchema = None + raw_chunker_config: RawChunkerScraperSchema = None + deserialize_config: DeserializeSchema + vdb_config: VDBResourceTaggingSchema + + class Config: + extra = "forbid" diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/kafka_source_schema.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/kafka_source_schema.py new file mode 100644 index 000000000..1b797d735 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/kafka_source_schema.py @@ -0,0 +1,42 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from pydantic import BaseModel +from pydantic import Field +from pydantic import validator + +logger = logging.getLogger(__name__) + + +class KafkaSourceSchema(BaseModel): + + max_batch_size: int = Field(default=2) + bootstrap_servers: str = "kafka:19092" + input_topic: str = "raw_queue" + group_id: str = "morpheus" + poll_interval: str = "10millis" + disable_commit: bool = False + disable_pre_filtering: bool = False + auto_offset_reset: str = "earliest" + stop_after: int = 0 + async_commits: bool = True + + class Config: + extra = "forbid" diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/raw_chunker_schema.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/raw_chunker_schema.py new file mode 100644 index 000000000..5e55f6856 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/raw_chunker_schema.py @@ -0,0 +1,32 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +logging + +import logging + +from pydantic import BaseModel + +logger = logging.getLogger(__name__) + + +class RawChunkerSchema(BaseModel): + payload_column: str = "payload" + chunk_size: int = 512 + chunk_overlap: int = 51 + + class Config: + extra = "forbid" diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/rss_source_pipe_schema.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/rss_source_pipe_schema.py new file mode 100644 index 000000000..d8018f458 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/rss_source_pipe_schema.py @@ -0,0 +1,52 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from pydantic import BaseModel +from pydantic import Field +from pydantic import validator + +logger = logging.getLogger(__name__) + + +class RSSSourcePipeSchema(BaseModel): + batch_size: int = 32 + cache_dir: str = "./.cache/http" + cooldown_interval_sec: int = 600 + enable_cache: bool = False + enable_monitor: bool = True + feed_input: List[str] = Field(default_factory=list) + interval_sec: int = 600 + output_batch_size: int = 2048 + request_timeout_sec: float = 2.0 + run_indefinitely: bool = True + stop_after_sec: int = 0 + vdb_resource_name: str + web_scraper_config: Optional[Dict[Any, Any]] = None + + @validator('feed_input', pre=True) + def validate_feed_input(cls, v): + if isinstance(v, str): + return [v] + elif isinstance(v, list): + return v + raise ValueError('feed_input must be a string or a list of strings') + + class Config: + extra = "forbid" diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/schema_transform_schema.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/schema_transform_schema.py new file mode 100644 index 000000000..446c70c01 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/schemas/schema_transform_schema.py @@ -0,0 +1,39 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any +from typing import Dict +from typing import Optional + +from pydantic import BaseModel +from pydantic import Field + +logger = logging.getLogger(__name__) + + +class ColumnTransformSchema(BaseModel): + dtype: str + op_type: str + from_: Optional[str] = Field(None, alias="from") + + class Config: + extra = "forbid" + + +class SchemaTransformSchema(BaseModel): + schema_transform_config: Dict[str, Dict[str, Any]] = Field(default_factory=dict) + + class Config: + extra = "forbid" diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml new file mode 100644 index 000000000..340cbe3cc --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml @@ -0,0 +1,302 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vdb_pipeline: + embeddings: + isolate_embeddings: false + model_kwargs: + force_convert_inputs: true + model_name: "all-MiniLM-L6-v2" + server_url: "http://localhost:8001" + use_shared_memory: true + + pipeline: + edge_buffer_size: 128 + feature_length: 512 + max_batch_size: 256 + num_threads: 10 + pipeline_batch_size: 128 + + sources: + - type: rss + name: "rss_cve" + config: + batch_size: 128 # Number of rss feeds per batch + cache_dir: "./.cache/http" + cooldown_interval_sec: 600 + enable_cache: False + enable_monitor: True + feed_input: + - "https://www.theregister.com/security/headlines.atom" + - "https://isc.sans.edu/dailypodcast.xml" + - "https://threatpost.com/feed/" + - "http://feeds.feedburner.com/TheHackersNews?format=xml" + - "https://www.bleepingcomputer.com/feed/" + - "https://therecord.media/feed/" + - "https://blog.badsectorlabs.com/feeds/all.atom.xml" + - "https://krebsonsecurity.com/feed/" + - "https://www.darkreading.com/rss_simple.asp" + - "https://blog.malwarebytes.com/feed/" + - "https://msrc.microsoft.com/blog/feed" + - "https://securelist.com/feed" + - "https://www.crowdstrike.com/blog/feed/" + - "https://threatconnect.com/blog/rss/" + - "https://news.sophos.com/en-us/feed/" + - "https://www.us-cert.gov/ncas/current-activity.xml" + - "https://www.csoonline.com/feed" + - "https://www.cyberscoop.com/feed" + - "https://research.checkpoint.com/feed" + - "https://feeds.fortinet.com/fortinet/blog/threat-research" + - "https://www.mcafee.com/blogs/rss" + - "https://www.digitalshadows.com/blog-and-research/rss.xml" + - "https://www.nist.gov/news-events/cybersecurity/rss.xml" + - "https://www.sentinelone.com/blog/rss/" + - "https://www.bitdefender.com/blog/api/rss/labs/" + - "https://www.welivesecurity.com/feed/" + - "https://unit42.paloaltonetworks.com/feed/" + - "https://mandiant.com/resources/blog/rss.xml" + - "https://www.wired.com/feed/category/security/latest/rss" + - "https://www.wired.com/feed/tag/ai/latest/rss" + - "https://blog.google/threat-analysis-group/rss/" + - "https://intezer.com/feed/" + interval_sec: 600 + output_batch_size: 2048 # Number of chunked documents per output batch + request_timeout_sec: 2.0 + run_indefinitely: true + stop_after_sec: 0 + web_scraper_config: + chunk_overlap: 51 + chunk_size: 512 + enable_cache: false + vdb_resource_name: "vdb_rss" + + - type: filesystem + name: "filesystem_pdf_source" + config: + batch_size: 1024 + extractor_config: + chunk_size: 512 + num_threads: 10 + chunk_overlap: 51 + enable_monitor: True + filenames: + - "./morpheus/data/randomly_generated_cybersecurity_text.txt" # will need to supply + vdb_resource_name: "vdb_pdf" + watch: false + + - type: filesystem + name: "filesystem_csv_source" + config: + batch_size: 1024 + chunk_overlap: 51 + chunk_size: 512 + converters_meta: + csv: + chunk_overlap: 51 + chunk_size: 1024 + text_column_names: # For CSV files, the data from each text_column_name will be concatenated together. + - "raw" # Requires same schema for all CSV files. + - "request_header_referer" + enable_monitor: True + filenames: + - "./models/datasets/training-data/log-parsing-training-data.csv" + vdb_resource_name: "vdb_csv" + watch: false + + - type: custom + name: "custom_source_text" + config: + batch_size: 1024 + enable_monitor: True + extractor_config: + chunk_size: 512 + chunk_overlap: 51 + config_name_mapping: "file_source_config" + filenames: + - "./morpheus/data/*.txt" + module_id: "file_source_pipe" + module_output_id: "output" + namespace: "morpheus_examples_llm" + vdb_resource_name: "VDBGENERAL" + watch: false + + tokenizer: + model_kwargs: + add_special_tokens: false + column: "content" + do_lower_case: true + truncation: true + vocab_hash_file: "data/bert-base-uncased-hash.txt" + model_name: "bert-base-uncased-hash" + + vdb: + batch_size: 5120 + resource_name: "VDBGENERAL" # Identifier for the resource in the vector database + embedding_size: 384 + recreate: True # Whether to recreate the resource if it already exists + service: "milvus" # Specify the type of vector database + uri: "http://localhost:19530" # URI for connecting to the Vector Database server + resource_schemas: + VDBGENERAL: + index_conf: + field_name: "embedding" + metric_type: "L2" + index_type: "HNSW" + params: + M: 8 + efConstruction: 64 + + schema_conf: + enable_dynamic_field: true + schema_fields: + - name: id + dtype: INT64 + description: Primary key for the collection + is_primary: true + auto_id: true + - name: title + dtype: VARCHAR + description: Title or heading of the data entry + max_length: 65_535 + - name: source + dtype: VARCHAR + description: Source or origin of the data entry + max_length: 65_535 + - name: summary + dtype: VARCHAR + description: Brief summary or abstract of the data content + max_length: 65_535 + - name: content + dtype: VARCHAR + description: Main content or body of the data entry + max_length: 65_535 + - name: embedding + dtype: FLOAT_VECTOR + description: Embedding vectors representing the data entry + dim: 384 # Size of the embeddings to store in the vector database + description: Collection schema for diverse data sources + vdb_pdf: + index_conf: + field_name: embedding + metric_type: L2 + index_type: HNSW + params: + M: 8 + efConstruction: 64 + + schema_conf: + enable_dynamic_field: true + schema_fields: + - name: id + dtype: INT64 + description: Primary key for the collection + is_primary: true + auto_id: true + - name: title + dtype: VARCHAR + description: Title or heading of the data entry + max_length: 65_535 + - name: source + dtype: VARCHAR + description: Source or origin of the data entry + max_length: 65_535 + - name: summary + dtype: VARCHAR + description: Brief summary or abstract of the data content + max_length: 65_535 + - name: content + dtype: VARCHAR + description: Main content or body of the data entry + max_length: 65_535 + - name: embedding + dtype: FLOAT_VECTOR + description: Embedding vectors representing the data entry + dim: 384 # Size of the embeddings to store in the vector database + description: Collection schema for diverse data sources + vdb_csv: + index_conf: + field_name: embedding + metric_type: L2 + index_type: HNSW + params: + M: 8 + efConstruction: 64 + + schema_conf: + enable_dynamic_field: true + schema_fields: + - name: id + dtype: INT64 + description: Primary key for the collection + is_primary: true + auto_id: true + - name: title + dtype: VARCHAR + description: Title or heading of the data entry + max_length: 65_535 + - name: source + dtype: VARCHAR + description: Source or origin of the data entry + max_length: 65_535 + - name: summary + dtype: VARCHAR + description: Brief summary or abstract of the data content + max_length: 65_535 + - name: content + dtype: VARCHAR + description: Main content or body of the data entry + max_length: 65_535 + - name: embedding + dtype: FLOAT_VECTOR + description: Embedding vectors representing the data entry + dim: 384 # Size of the embeddings to store in the vector database + description: Collection schema for diverse data sources + vdb_rss: + index_conf: + field_name: embedding + metric_type: L2 + index_type: HNSW + params: + M: 8 + efConstruction: 64 + + schema_conf: + enable_dynamic_field: true + schema_fields: + - name: id + dtype: INT64 + description: Primary key for the collection + is_primary: true + auto_id: true + - name: title + dtype: VARCHAR + description: Title or heading of the data entry + max_length: 65_535 + - name: source + dtype: VARCHAR + description: Source or origin of the data entry + max_length: 65_535 + - name: summary + dtype: VARCHAR + description: Brief summary or abstract of the data content + max_length: 65_535 + - name: content + dtype: VARCHAR + description: Main content or body of the data entry + max_length: 65_535 + - name: embedding + dtype: FLOAT_VECTOR + description: Embedding vectors representing the data entry + dim: 384 # Size of the embeddings to store in the vector database + description: Collection schema for diverse data sources \ No newline at end of file diff --git a/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_utils.py b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_utils.py new file mode 100644 index 000000000..c3babf526 --- /dev/null +++ b/experimental/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_utils.py @@ -0,0 +1,565 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import typing + +import pymilvus +import yaml + +from morpheus.config import Config +from morpheus.config import PipelineModes +from morpheus.service.vdb.milvus_client import DATA_TYPE_MAP + +logger = logging.getLogger(__name__) + + +def build_milvus_config(resource_schema_config: dict): + schema_fields = [] + for field_data in resource_schema_config["schema_conf"]["schema_fields"]: + field_data["dtype"] = DATA_TYPE_MAP.get(field_data["dtype"]) + field_schema = pymilvus.FieldSchema(**field_data) + schema_fields.append(field_schema.to_dict()) + + resource_schema_config["schema_conf"]["schema_fields"] = schema_fields + + return resource_schema_config + + +def is_valid_service(ctx, param, value): # pylint: disable=unused-argument + """ + Validate the provided vector database service name. + + Checks if the given vector database service name is supported and valid. This is used as a callback function + for a CLI option to ensure that the user inputs a supported service name. + + Parameters + ---------- + ctx : click.Context + The context within which the command is being invoked. + param : click.Parameter + The parameter object that this function serves as a callback for. + value : str + The value of the parameter to validate. + + Returns + ------- + str + The validated and lowercased service name. + + Raises + ------ + click.BadParameter + If the provided service name is not supported or invalid. + """ + from morpheus.service.vdb.utils import validate_service + value = value.lower() + return validate_service(service_name=value) + + +def merge_dicts(d1, d2): + """ + Recursively merge two dictionaries. + + Nested dictionaries are merged instead of being replaced. + Non-dict items in the second dictionary will override those in the first. + + Parameters + ---------- + d1 : dict + The first dictionary. + d2 : dict + The second dictionary, whose items will take precedence. + + Returns + ------- + dict + The merged dictionary. + """ + for key, value in d2.items(): + if key in d1 and isinstance(d1[key], dict) and isinstance(value, dict): + merge_dicts(d1[key], value) + else: + d1[key] = value + return d1 + + +def merge_configs(file_config, cli_config): + """ + Merge two configuration dictionaries, prioritizing the file_config configuration. + + This function merges configurations provided from a file and the CLI, with the CLI configuration taking precedence + in case of overlapping keys. Nested dictionaries are merged recursively. + + Parameters + ---------- + file_config : dict + The configuration dictionary loaded from a file. + cli_config : dict + The configuration dictionary provided via CLI arguments. + + Returns + ------- + dict + A merged dictionary with CLI configurations overriding file configurations where they overlap. + """ + return merge_dicts(cli_config.copy(), {k: v for k, v in file_config.items() if v is not None}) + + +def _build_default_rss_source(enable_cache, + enable_monitors, + interval_secs, + run_indefinitely, + stop_after, + vector_db_resource_name, + content_chunking_size, + rss_request_timeout_sec, + feed_inputs): + return { + 'type': 'rss', + 'name': 'rss-cli', + 'config': { + # RSS feeds can take a while to pull, smaller batch sizes allows the pipeline to feel more responsive + "batch_size": 32, + "output_batch_size": 2048, + "cache_dir": "./.cache/http", + "cooldown_interval_sec": interval_secs, + "enable_cache": enable_cache, + "enable_monitor": enable_monitors, + "feed_input": feed_inputs if feed_inputs else build_rss_urls(), + "interval_sec": interval_secs, + "request_timeout_sec": rss_request_timeout_sec, + "run_indefinitely": run_indefinitely, + "vdb_resource_name": vector_db_resource_name, + "web_scraper_config": { + "chunk_size": content_chunking_size, + "enable_cache": enable_cache, + } + } + } + + +def _build_default_filesystem_source(enable_monitors, + file_source, + pipeline_batch_size, + run_indefinitely, + vector_db_resource_name, + content_chunking_size, + num_threads): + return { + 'type': 'filesystem', + 'name': 'filesystem-cli', + 'config': { + "batch_size": pipeline_batch_size, + "enable_monitor": enable_monitors, + "extractor_config": { + "chunk_size": content_chunking_size, + "num_threads": num_threads, + }, + "filenames": file_source, + "vdb_resource_name": vector_db_resource_name, + "watch": run_indefinitely, + } + } + + +def build_cli_configs(source_type, + enable_cache, + embedding_size, + isolate_embeddings, + embedding_model_name, + enable_monitors, + file_source, + interval_secs, + pipeline_batch_size, + run_indefinitely, + stop_after, + vector_db_resource_name, + vector_db_service, + vector_db_uri, + content_chunking_size, + num_threads, + rss_request_timeout_sec, + model_max_batch_size, + model_fea_length, + triton_server_url, + feed_inputs): + """ + Create configuration dictionaries based on CLI arguments. + + Constructs individual configuration dictionaries for various components of the data processing pipeline, + such as source, embeddings, pipeline, tokenizer, and vector database configurations. + + Parameters + ---------- + source_type : list of str + Types of data sources (e.g., 'rss', 'filesystem'). + enable_cache : bool + Flag to enable caching. + embedding_size : int + Size of the embeddings. + isolate_embeddings : bool + Flag to isolate embeddings. + embedding_model_name : str + Name of the embedding model. + enable_monitors : bool + Flag to enable monitor functionality. + file_source : list of str + File sources or paths to be processed. + interval_secs : int + Interval in seconds for operations. + pipeline_batch_size : int + Batch size for the pipeline. + run_indefinitely : bool + Flag to run the process indefinitely. + stop_after : int + Stop after a certain number of records. + vector_db_resource_name : str + Name of the resource in the vector database. + vector_db_service : str + Name of the vector database service. + vector_db_uri : str + URI for the vector database server. + content_chunking_size : int + Size of content chunks. + num_threads : int + Number of threads to use. + rss_request_timeout_sec : float + Timeout in seconds for RSS requests. + model_max_batch_size : int + Maximum batch size for the model. + model_fea_length : int + Feature length for the model. + triton_server_url : str + URL of the Triton server. + feed_inputs : list of str + RSS feed inputs. + + Returns + ------- + tuple + A tuple containing five dictionaries for source, embeddings, pipeline, tokenizer, and vector database configurations. + """ + + # Source Configuration + cli_source_conf = {} + if 'rss' in source_type: + cli_source_conf['rss'] = _build_default_rss_source(enable_cache, + enable_monitors, + interval_secs, + run_indefinitely, + stop_after, + vector_db_resource_name, + content_chunking_size, + rss_request_timeout_sec, + feed_inputs) + if 'filesystem' in source_type: + cli_source_conf['filesystem'] = _build_default_filesystem_source(enable_monitors, + file_source, + pipeline_batch_size, + run_indefinitely, + vector_db_resource_name, + content_chunking_size, + num_threads) + + # Embeddings Configuration + cli_embeddings_conf = { + "feature_length": model_fea_length, + "max_batch_size": model_max_batch_size, + "model_kwargs": { + "force_convert_inputs": True, + "model_name": embedding_model_name, + "server_url": triton_server_url, + "use_shared_memory": False, + }, + "num_threads": num_threads, + } + + # Pipeline Configuration + cli_pipeline_conf = { + "edge_buffer_size": 128, + "embedding_size": embedding_size, + "feature_length": model_fea_length, + "isolate_embeddings": isolate_embeddings, + "max_batch_size": 256, + "num_threads": num_threads, + "pipeline_batch_size": pipeline_batch_size, + } + + # Tokenizer Configuration + cli_tokenizer_conf = { + "model_name": "bert-base-uncased-hash", + "model_kwargs": { + "add_special_tokens": False, + "column": "content", + "do_lower_case": True, + "truncation": True, + "vocab_hash_file": "data/bert-base-uncased-hash.txt", + } + } + + # VDB Configuration + cli_vdb_conf = { + # Vector db upload has some significant transaction overhead, batch size here should be as large as possible + 'batch_size': 5120, + 'resource_name': vector_db_resource_name, + 'embedding_size': embedding_size, + 'recreate': False, + 'resource_schemas': { + vector_db_resource_name: + build_defualt_milvus_config(embedding_size) if (vector_db_service == 'milvus') else None, + }, + 'service': vector_db_service, + 'uri': vector_db_uri, + } + + return cli_source_conf, cli_embeddings_conf, cli_pipeline_conf, cli_tokenizer_conf, cli_vdb_conf + + +def build_pipeline_config(pipeline_config: dict): + """ + Construct a pipeline configuration object from a dictionary. + + Parameters + ---------- + pipeline_config : dict + A dictionary containing pipeline configuration parameters. + + Returns + ------- + Config + A pipeline configuration object populated with values from the input dictionary. + + Notes + ----- + This function is responsible for mapping a dictionary of configuration parameters + into a structured configuration object used by the pipeline. + """ + + config = Config() + config.mode = PipelineModes.NLP + + embedding_size = pipeline_config.get('embedding_size') + + config.num_threads = pipeline_config.get('num_threads') + config.pipeline_batch_size = pipeline_config.get('pipeline_batch_size') + config.model_max_batch_size = pipeline_config.get('max_batch_size') + config.feature_length = pipeline_config.get('feature_length') + config.edge_buffer_size = pipeline_config.get('edge_buffer_size') + config.class_labels = [str(i) for i in range(embedding_size)] + + return config + + +def build_final_config(vdb_conf_path, + cli_source_conf, + cli_embeddings_conf, + cli_pipeline_conf, + cli_tokenizer_conf, + cli_vdb_conf): + """ + Load and merge configurations from the CLI and YAML file. + + This function combines the configurations provided via the CLI with those specified in a YAML file. + If a YAML configuration file is specified and exists, it will merge its settings with the CLI settings, + with the YAML settings taking precedence. + + Parameters + ---------- + vdb_conf_path : str + Path to the YAML configuration file. + cli_source_conf : dict + Source configuration provided via CLI. + cli_embeddings_conf : dict + Embeddings configuration provided via CLI. + cli_pipeline_conf : dict + Pipeline configuration provided via CLI. + cli_tokenizer_conf : dict + Tokenizer configuration provided via CLI. + cli_vdb_conf : dict + Vector Database (VDB) configuration provided via CLI. + + Returns + ------- + dict + A dictionary containing the final merged configuration for the pipeline, including source, embeddings, + tokenizer, and VDB configurations. + + Notes + ----- + The function prioritizes the YAML file configurations over CLI configurations. In case of overlapping + settings, the values from the YAML file will overwrite those from the CLI. + """ + final_config = {} + + # Load and merge configurations from the YAML file if it exists + if vdb_conf_path: + with open(vdb_conf_path, 'r') as file: + vdb_pipeline_config = yaml.safe_load(file).get('vdb_pipeline', {}) + + embeddings_conf = merge_configs(vdb_pipeline_config.get('embeddings', {}), cli_embeddings_conf) + pipeline_conf = merge_configs(vdb_pipeline_config.get('pipeline', {}), cli_pipeline_conf) + source_conf = vdb_pipeline_config.get('sources', []) + list(cli_source_conf.values()) + tokenizer_conf = merge_configs(vdb_pipeline_config.get('tokenizer', {}), cli_tokenizer_conf) + vdb_conf = vdb_pipeline_config.get('vdb', {}) + resource_schema = vdb_conf.pop("resource_schema", None) + + if resource_schema: + vdb_conf["resource_kwargs"] = build_milvus_config(resource_schema) + vdb_conf = merge_configs(vdb_conf, cli_vdb_conf) + + pipeline_conf['embedding_size'] = vdb_conf.get('embedding_size', 384) + + final_config.update({ + 'embeddings_config': embeddings_conf, + 'pipeline_config': build_pipeline_config(pipeline_conf), + 'source_config': source_conf, + 'tokenizer_config': tokenizer_conf, + 'vdb_config': vdb_conf, + }) + else: + # Use CLI configurations only + final_config.update({ + 'embeddings_config': cli_embeddings_conf, + 'pipeline_config': build_pipeline_config(cli_pipeline_conf), + 'source_config': list(cli_source_conf.values()), + 'tokenizer_config': cli_tokenizer_conf, + 'vdb_config': cli_vdb_conf, + }) + + # If no sources are specified either via CLI or in the yaml config, add a default RSS source + if (not final_config['source_config']): + final_config['source_config'].append( + _build_default_rss_source(enable_cache=True, + enable_monitors=True, + interval_secs=60, + run_indefinitely=True, + stop_after=None, + vector_db_resource_name="VDBUploadExample", + content_chunking_size=128, + rss_request_timeout_sec=30, + feed_inputs=build_rss_urls())) + + return final_config + + +def build_defualt_milvus_config(embedding_size: int) -> typing.Dict[str, typing.Any]: + """ + Builds the configuration for Milvus. + + This function creates a dictionary configuration for a Milvus collection. + It includes the index configuration and the schema configuration, with + various fields like id, title, link, summary, page_content, and embedding. + + Parameters + ---------- + embedding_size : int + The size of the embedding vector. + + Returns + ------- + typing.Dict[str, Any] + A dictionary containing the configuration settings for Milvus. + """ + + milvus_resource_kwargs = { + "index_conf": { + "field_name": "embedding", + "metric_type": "L2", + "index_type": "HNSW", + "params": { + "M": 8, + "efConstruction": 64, + }, + }, + "schema_conf": { + "enable_dynamic_field": True, + "schema_fields": [ + pymilvus.FieldSchema(name="id", + dtype=pymilvus.DataType.INT64, + description="Primary key for the collection", + is_primary=True, + auto_id=True).to_dict(), + pymilvus.FieldSchema(name="title", + dtype=pymilvus.DataType.VARCHAR, + description="The title of the RSS Page", + max_length=65_535).to_dict(), + pymilvus.FieldSchema(name="source", + dtype=pymilvus.DataType.VARCHAR, + description="The URL of the RSS Page", + max_length=65_535).to_dict(), + pymilvus.FieldSchema(name="summary", + dtype=pymilvus.DataType.VARCHAR, + description="The summary of the RSS Page", + max_length=65_535).to_dict(), + pymilvus.FieldSchema(name="content", + dtype=pymilvus.DataType.VARCHAR, + description="A chunk of text from the RSS Page", + max_length=65_535).to_dict(), + pymilvus.FieldSchema(name="embedding", + dtype=pymilvus.DataType.FLOAT_VECTOR, + description="Embedding vectors", + dim=embedding_size).to_dict(), + ], + "description": "Test collection schema" + } + } + + return milvus_resource_kwargs + + +def build_rss_urls() -> typing.List[str]: + """ + Builds a list of RSS feed URLs. + + Returns + ------- + typing.List[str] + A list of URLs as strings, each pointing to a different RSS feed. + """ + + return [ + "https://www.theregister.com/security/headlines.atom", + "https://isc.sans.edu/dailypodcast.xml", + "https://threatpost.com/feed/", + "http://feeds.feedburner.com/TheHackersNews?format=xml", + "https://www.bleepingcomputer.com/feed/", + "https://therecord.media/feed/", + "https://blog.badsectorlabs.com/feeds/all.atom.xml", + "https://krebsonsecurity.com/feed/", + "https://www.darkreading.com/rss_simple.asp", + "https://blog.malwarebytes.com/feed/", + "https://msrc.microsoft.com/blog/feed", + "https://securelist.com/feed", + "https://www.crowdstrike.com/blog/feed/", + "https://threatconnect.com/blog/rss/", + "https://news.sophos.com/en-us/feed/", + "https://www.us-cert.gov/ncas/current-activity.xml", + "https://www.csoonline.com/feed", + "https://www.cyberscoop.com/feed", + "https://research.checkpoint.com/feed", + "https://feeds.fortinet.com/fortinet/blog/threat-research", + "https://www.mcafee.com/blogs/rss", + "https://www.digitalshadows.com/blog-and-research/rss.xml", + "https://www.nist.gov/news-events/cybersecurity/rss.xml", + "https://www.sentinelone.com/blog/rss/", + "https://www.bitdefender.com/blog/api/rss/labs/", + "https://www.welivesecurity.com/feed/", + "https://unit42.paloaltonetworks.com/feed/", + "https://mandiant.com/resources/blog/rss.xml", + "https://www.wired.com/feed/category/security/latest/rss", + "https://www.wired.com/feed/tag/ai/latest/rss", + "https://blog.google/threat-analysis-group/rss/", + "https://intezer.com/feed/", + ] diff --git a/experimental/streaming_ingest_rag/producer/src/README.md b/experimental/streaming_ingest_rag/producer/src/README.md new file mode 100644 index 000000000..c48933951 --- /dev/null +++ b/experimental/streaming_ingest_rag/producer/src/README.md @@ -0,0 +1,67 @@ +# Overview + +Provides a sample data producer and consumer for the Streaming Embeddings RAG Workflow. + +## Producer Usage + +Output from help utility: + +```bash +root@50c78a63ca48:/workspace/src# python3 producer.py --help +usage: producer.py [-h] [-f FILEPATH] [-b BOOTSTRAP_SERVERS] [-t TOPIC] [-i INTERVAL] [-c CLIENT_ID] [-d] + +options: + -h, --help show this help message and exit + -f FILEPATH, --filepath FILEPATH + Path to work queue jsonl file. (Default value: data/test.jsonl) + -b BOOTSTRAP_SERVERS, --bootstrap-servers BOOTSTRAP_SERVERS + Kafka broker host:port. (Default value: kafka:19092) + -t TOPIC, --topic TOPIC + Kafka topic used to publish work. (Default value: work_queue) + -n N_MESSAGES, --n-messages N_MESSAGES + Total messages to produce. (Default value: 1000) + -i INTERVAL, --interval INTERVAL + Inteval to publish messages. (Default value: 1.0) + -l, --loop Flag to continuously produce messages. (Default value: False) + -c CLIENT_ID, --client-id CLIENT_ID + Client ID for the producer. (Default value: publisher) + -d, --delete-topic Flag to delete topic after producing completes. (Default value: False) +``` + +Example usage below: + +```bash +python3 producer.py --filepath data/url_sample.jsonl \ + --bootstrap-servers kafka:19092 --topic scrape_queue \ + --n-messages 1000 --interval 0.1 +``` + +## Consumer Usage + +Output from help utility: + +```bash +root@1e78121a14e0:/workspace/src# python3 consumer.py --help +usage: consumer.py [-h] [-f GROUP_ID] [-b BOOTSTRAP_SERVERS] [-t TOPIC] [-m MAX_MESSAGES] [-a AUTO_OFFSET_RESET] + +options: + -h, --help show this help message and exit + -f GROUP_ID, --group-id GROUP_ID + Specifies consumer groups subscriber will belong to. (Default value: morpheus) + -b BOOTSTRAP_SERVERS, --bootstrap-servers BOOTSTRAP_SERVERS + Kafka broker host:port. (Default value: kafka:19092) + -t TOPIC, --topic TOPIC + Kafka topic consumer will subscribe to. (Default value: work_queue) + -m MAX_MESSAGES, --max-messages MAX_MESSAGES + Maximum messages to read from kafka topic. (Default value: 10) + -a AUTO_OFFSET_RESET, --auto-offset-reset AUTO_OFFSET_RESET + Specify auto.offset.reset parameter driving when to consume messages in a topic. (Default value: smallest) +``` + +Example usage below: + +```bash +python3 consumer.py --group-id morpheus \ + --bootstrap-servers kafka:19092 --topic scrape_queue \ + --max-messages 10 --auto-offset-reset smallest +``` diff --git a/experimental/streaming_ingest_rag/producer/src/consumer.py b/experimental/streaming_ingest_rag/producer/src/consumer.py new file mode 100644 index 000000000..5b82498b7 --- /dev/null +++ b/experimental/streaming_ingest_rag/producer/src/consumer.py @@ -0,0 +1,122 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import argparse + +from abc import ABC, abstractmethod + +from confluent_kafka.admin import AdminClient +from confluent_kafka import Consumer + + +class Subscriber(ABC): + + def __init__( + self, + bootstrap_servers='kafka:19092', + group_id="morphues", + auto_offset_reset='smallest', + min_commit_count=5): + + self._conf = { + 'bootstrap.servers': bootstrap_servers, + 'group.id': group_id, + 'auto.offset.reset': auto_offset_reset} + + self._consumer = Consumer(self._conf) + self._min_commit_count = min_commit_count + + + def subscribe(self, topic, max_messages=None): + + try: + self._consumer.subscribe([topic]) + + msg_count = 0 + while(True): + msg = self._consumer.poll(timeout=1.0) + if msg is None: continue + + if msg.error(): + if msg.error().code() == KafkaError._PARTITION_EOF: + # End of partition event + sys.stderr.write('%% %s [%d] reached end at offset %d\n' % + (msg.topic(), msg.partition(), msg.offset())) + elif msg.error(): + raise KafkaException(msg.error()) + else: + self._msg_process(msg) + msg_count += 1 + if msg_count % self._min_commit_count == 0: + self._consumer.commit(asynchronous=True) + + if (max_messages > 0) and (msg_count >= max_messages): + break + + finally: + # Close down consumer to commit final offsets. + self._consumer.close() + + def _msg_process(self, message): + print(json.loads(message.value())) + +def main(args): + + # initialize consumer + bootstrap_servers = 'kafka:19092' + group_id='morpheus' + auto_offset_reset = 'smallest' + + subscriber = Subscriber( + bootstrap_servers=args.bootstrap_servers, + group_id=args.group_id, + auto_offset_reset=args.auto_offset_reset) + + # start consuming + topic='work_queue' + + subscriber.subscribe( + topic=args.topic, + max_messages=args.max_messages) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + + parser.add_argument("-f", "--group-id", type=str, + required=False, default="morpheus", + help="Specifies consumer groups subscriber will belong to. (Default value: morpheus)") + + parser.add_argument("-b", "--bootstrap-servers", type=str, + required=False, default='kafka:19092', + help="Kafka broker host:port. (Default value: kafka:19092)") + + parser.add_argument("-t", "--topic", type=str, + required=False, default='work_queue', + help="Kafka topic consumer will subscribe to. (Default value: work_queue)") + + parser.add_argument("-m", "--max-messages", type=int, + required=False, default=0, + help="Maximum messages to read from kafka topic. (Default value: 10)") + + parser.add_argument("-a", "--auto-offset-reset", type=str, + required=False, default='smallest', + help="Specify auto.offset.reset parameter driving when to consume messages in a topic. (Default value: smallest)") + + args = parser.parse_args() + + main(args) + diff --git a/experimental/streaming_ingest_rag/producer/src/data/url_sample.jsonl b/experimental/streaming_ingest_rag/producer/src/data/url_sample.jsonl new file mode 100644 index 000000000..50cd0b153 --- /dev/null +++ b/experimental/streaming_ingest_rag/producer/src/data/url_sample.jsonl @@ -0,0 +1,730 @@ +{"payload": "https://nvidianews.nvidia.com/bios/chris-a-malachowsky", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/bios/colette-kress", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/bios/debora-shoquist", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/bios/jay-puri", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/bios/jensen-huang", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/bios/tim-teter", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/contacts", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/cookie-notice", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/10-years-in-the-making-nvidia-brings-real-time-ray-tracing-to-gamers-with-geforce-rtx", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/200-gigabit-hdr-infiniband-boosts-microsoft-azure-high-performance-computing-cloud-instances", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/200-gigabit-hdr-infiniband-to-accelerate-eni-supercomputing-platform-to-become-world-s-most-powerful-industrial-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/2009-hpcwire-readers-and-editors-choice-awards-recognize-rise-of-nvidia-tesla-gpu-computing-solutions-6623030", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/200g-hdr-infiniband-accelerates-31-of-new-infiniband-systems-on-november-s-top500-including-fastest-2019-built-top500-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/200g-hdr-infiniband-to-accelerate-the-new-european-centre-for-medium-range-weather-forecasts-ecmwf-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/a-hero-for-every-gamer-nvidia-introduces-new-geforce-gtx-super-series", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/a-quantum-leap-for-every-gamer-nvidia-unveils-the-geforce-gtx-1060", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/a-quantum-leap-for-every-gamer:-nvidia-unveils-the-geforce-gtx-1060", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/a-quantum-leap-for-notebooks:-geforce-gtx-10-series-gpus-come-to-fastest-growing-gaming-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/a-quantum-leap-in-gaming:-nvidia-introduces-geforce-gtx-1080", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/accelerator-use-surges-in-world-s-top-supercomputers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/ace-avatar-cloud-engine-microservices", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/adobe-and-nvidia-announce-partnership-to-deliver-new-ai-services-for-creativity-and-digital-experiences", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/adobe-and-nvidia-partner-to-unlock-the-power-of-generative-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/ai-enterprise-ready-servers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/amdocs-and-nvidia-to-accelerate-adoption-of-generative-ai-for-1-7-trillion-telecom-industry", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/ampere-nvidia-extend-aican-gaming-platform-ecosystem", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/anita-powers-named-vice-president-occidental-petroleum-executive-vice-president-oxy-oil-and-gas", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/aousd-to-drive-open-standards-for-3d-content", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/at-t-supercharges-operations-with-nvidia-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/atos-and-nvidia-to-advance-climate-and-healthcare-research-with-exascale-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/aws-and-nvidia-collaborate-on-next-generation-infrastructure-for-training-large-machine-learning-models-and-building-generative-ai-applications", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/aws-nvidia-strategic-collaboration-for-generative-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/baidu-advances-ai-in-the-cloud-with-latest-nvidia-pascal-gpus", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/beijing-meteorological-service-selects-200-gigabit-hdr-infiniband-to-accelerate-new-supercomputing-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/bmw-group-selects-nvidia-to-redefine-factory-logistics", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/bosch-announces-ai-self-driving-computer-with-nvidia", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/byd-worlds-largest-ev-maker-partners-with-nvidia-for-mainstream-software-defined-vehicles-built-on-nvidia-drive", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/call-of-duty-modern-warfare-to-support-directx-raytracing-on-pc-powered-by-nvidia-geforce-rtx", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/cd-projekt-red-and-nvidia-partner-to-bring-ray-tracing-to-cyberpunk-2077", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/check-point-software-technologies-propels-mellanox-past-one-million-ethernet-switch-ports", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/china-s-top-cloud-providers-adopt-nvidia-volta-gpus-to-supercharge-next-gen-ai-services", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/china-s-top-server-builders-adopt-nvidia-ai-design-for-cloud-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/cineca-to-build-worlds-fastest-ai-supercomputer-with-nvidia-and-atos", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/continental-and-nvidia-partner-to-enable-worldwide-production-of-ai-self-driving-cars", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/csc-the-finnish-it-center-for-science-and-the-finnish-meteorological-institute-select-200-gigabit-hdr-infiniband-to-accelerate-multi-phase-supercomputer-program", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/cyberlink-s-photo-and-video-applications-deliver-improved-performance-and-new-features-with-nvidia-geforce-and-ion-gpus-6623033", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/dell-technologies-and-nvidia-introduce-project-helix-for-secure-on-premises-generative-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/design-and-performance-perfected-nvidia-introduces-max-q-for-gaming-laptops", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/deutsche-post-dhl-group-selects-nvidia-drive-px-for-autonomous-delivery-truck-fleet", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/didi-chuxing-teams-with-nvidia-for-autonomous-driving-and-cloud-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/dropbox-and-nvidia-team-to-bring-personalized-generative-ai-to-millions-of-customers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/durham-university-and-diracs-new-nvidia-infiniband-powered-supercomputer-to-accelerate-our-understanding-of-the-universe", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/edward-a-sandy-lowe-named-president-oxy-oil-and-gas-international-production", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/energy-giant-hess-explores-benefits-of-virtualization-nvidia-sli-multi-os-technology-on-hp-workstations-6623039", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/ericsson-and-nvidia-collaborate-to-accelerate-virtualized-5g-radio-access-networks-with-gpus", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/fanuc-to-build-factory-of-the-future-using-nvidia-ai-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/former-u-s-commerce-secretary-carlos-m-gutierrez-elected-to-occidental-petroleum-board-of-directors", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/fortnite-is-rtx-on-real-time-ray-tracing-comes-to-one-of-most-popular-games-on-the-planet", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/foxconn-automated-electric-vehicles-nvidia-drive", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/fujitsu-selects-infiniband-to-accelerate-their-new-arm-based-primehpc-fx700-supercomputer-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/future-of-streaming-entertainment-starts-today-with-launch-of-nvidia-shield-tv-starting-at-149", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/ge-and-nvidia-join-forces-to-accelerate-artificial-intelligence-adoption-in-healthcare", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/geforce-gtx-550-ti-and-3d-vision-redefine-pc-gaming-at-149-each-6670240", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/geforce-now-streams-aaa-games-to-cars", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/geforce-rtx-40-super-series", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/geforce-rtx-4060-family-is-here-nvidias-revolutionary-ada-lovelace-architecture-comes-to-core-gamers-everywhere-starting-at-299", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/generative-ai-rtx-pcs-and-workstations", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/gh200-grace-hopper-superchip-with-hbm3e-memory", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/global-availability-of-nvidia-ai-enterprise-makes-ai-accessible-for-every-industry", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/global-computer-companies-announce-nvidia-powered-enterprise-servers-optimized-for-data-science", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/global-pc-builders-rally-around-nvidia-to-deliver-vr-ready-pcs-and-add-in-cards", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/global-technology-leaders-adopt-nvidia-egx-edge-ai-platform-to-infuse-intelligence-at-the-edge-of-every-business", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/google-cloud-and-nvidia-expand-partnership-to-advance-ai-computing-software-and-services", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/gpu-powered-amber-11-unleashes-desktop-supercomputing-for-bio-scientists-6623007", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/gpu-supercomputing-now-available-on-demand-from-amazon-web-services-6622974", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/hdr-200g-infiniband-sets-new-performance-records-accelerating-multiple-high-performance-computing-and-artificial-intelligence-platforms-around-the-world", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/hdr-infiniband-selected-by-meteo-france-to-accelerate-two-new-large-scale-supercomputers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/here-nvidia-partner-on-ai-technology-for-hd-mapping-from-cloud-to-car", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/high-performance-computing-luminaries-to-speak-at-nvidia-booth-at-sc09-conference-6623032", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/hyundai-motor-group-selects-nvidia-drive-infotainment-and-ai-platform-for-all-future-hyundai-kia-and-genesis-models", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/ibm-and-nvidia-collaborate-to-expand-open-source-machine-learning-tools-for-data-scientists", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/industrial-light-magic-s-rango-rides-into-the-wild-west-with-help-from-nvidia-quadro-6670242", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/industry-leaders-investors-line-up-to-see-the-hottest-new-technologies-at-next-month-s-emerging-companies-summit-6622989", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/infiniband-accelerates-six-of-the-top-ten-supercomputers-in-the-world-including-the-top-three-and-four-of-the-top-five-on-june-s-top500", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/infosys-and-nvidia", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/international-trade-commission-initial-determination-finds-rambus-patents-invalid", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/international-trade-commission-initial-determination-finds-rambus-patents-invalid-6623020", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/it-s-here-nvidia-quadro-delivers-nvidia-fermi-architecture-to-the-mac-pro-6622972", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/it4innovations-national-supercomputing-center-a-eurohpc-supercomputing-site-selected-mellanox-200g-hdr-infiniband-to-accelerate-their-new-research-infrastructure", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/its-time-for-ti-nvidia-introduces-gaming-flagship-the-geforce-rtx-3080-ti", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/jaguar-land-rover-announces-partnership-with-nvidia", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/japans-komatsu-selects-nvidia-as-partner-for-deploying-ai-to-create-safer-more-efficient-construction-sites", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/journey-to-the-red-planet-in-mars-2030-out-of-this-world-vr-experience-debuts-at-gpu-technology-conference", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/kings-college-london-and-nvidia-build-uks-first-ai-platform-for-nhs-hospitals", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/kroger-and-nvidia-to-reinvent-the-shopping-experience-through-state-of-the-art-ai-enabled-applications-and-services", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/leading-japanese-companies-select-nvidia-jetson-agx-xavier-for-next-generation-autonomous-machines", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/lenovo-nvidia-hybrid-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/lockheed-martin-nvidia-digital-twin-for-noaa", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/medtronic-and-nvidia-collaborate-to-build-ai-platform-for-medical-devices", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-200-gigabit-hdr-infiniband-advances-innovations-in-high-performance-computing-and-artificial-intelligence", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-200-gigabit-hdr-infiniband-to-accelerate-a-world-leading-supercomputer-at-the-high-performance-computing-center-of-the-university-of-stuttgart-hlrs", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-capital-invests-in-storage-leaders-excelero-and-wekaio", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-delivers-record-first-quarter-2020-financial-results", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-delivers-record-fourth-quarter-and-annual-2018-results-exceeded-1-billion-in-annual-revenue-in-2018", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-delivers-record-fourth-quarter-and-annual-2019-financial-results", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-delivers-record-revenue-for-the-first-quarter-of-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-delivers-record-revenue-for-the-second-quarter-of-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-delivers-record-revenue-for-the-third-quarter-of-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-delivers-spectrum-3-based-ethernet-switches-first-12-8-tbps-networking-platforms-optimized-for-cloud-storage-and-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-ethernet-and-infiniband-solutions-deliver-breakthrough-performance-for-amd-epycTM-7002-processor-based-data-centers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-hdr-200g-infiniband-accelerates-new-generation-of-world-wide-high-performance-computing-and-artificial-intelligence-supercomputers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-hdr-200g-infiniband-deep-learning-acceleration-engines-demonstrates-two-times-higher-performance-for-artificial-intelligence-ai-platforms-with-nvidia", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-introduces-advanced-network-telemetry-technology-to-keep-your-business-up-and-running", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-introduces-breakthrough-nvme-snapTM-technology-to-simplify-composable-storage", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-introduces-ethernet-cloud-fabric-technology-based-on-the-world-s-most-advanced-100200400gbe-open-ethernet-switches", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-introduces-mellanox-skywayTM-200-gigabit-hdr-infiniband-to-ethernet-gateway-appliance-for-high-performance-and-cloud-data-centers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-introduces-quantum-longreach-appliance-extending-100g-edr-and-200g-hdr-infiniband-connectivity-to-10-and-40-kilometers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-liquid-cooled-hdr-200g-multi-host-infiniband-adapters-accelerate-lenovo-s-most-advanced-liquid-cooled-server-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-propels-jd-cloud-to-new-levels-of-performance-and-efficiency", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-propels-nvmetcp-and-roce-fabrics-to-new-heights", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-releases-independent-report-demonstrating-connectx-ethernet-nics-outperform-competition-and-ships-first-connectx-6-dx-secure-smartnics", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-showcases-live-system-demonstrations-of-linkxTM-200g-400g-cables-transceivers-at-ofc-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-technologies-names-doug-ahrens-as-chief-financial-officer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-to-acquire-world-leading-network-intelligence-technology-developer-titan-ic-to-strengthen-leadership-in-security-and-data-analytics", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-to-report-first-quarter-2019-financial-results-on-april-16-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-to-report-first-quarter-2020-financial-results", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-to-report-fourth-quarter-2018-financial-results-on-january-30-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-to-report-fourth-quarter-2019-financial-results-on-january-29-2020", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-to-report-second-quarter-2019-financial-results-on-july-24-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mellanox-to-report-third-quarter-2019-financial-results-on-october-30-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/mercedes-benz-and-nvidia-to-build-software-defined-computing-architecture-for-automated-driving-across-future-fleet", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/microsoft-and-nvidia-announce-expansive-new-gaming-deal", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/minecraft-is-rtx-on-real-time-ray-tracing-comes-to-worlds-best-selling-video-game", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/minecraft-with-rtx-windows-beta-launch-brings-stunning-ray-traced-visuals-to-millions-of-gamers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/muvee-reveal-version-8-home-movie-making-software-is-up-to-8x-faster-with-cuda-enabled-nvidia-geforce-gpus-6623027", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nemo-retriever-generative-ai-microservice", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-geforce-gtx-1660-ti-delivers-great-performance-leap-for-every-gamer-starting-at-279", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-nvidia-data-center-inference-platform-to-fuel-next-wave-of-ai-powered-services", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-nvidia-egx-edge-supercomputing-platform-accelerates-ai-iot-5g-at-the-edge", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-nvidia-hyperscale-accelerators-boost-machine-learning-throughput-for-web-data-centers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-nvidia-pascal-gpus-accelerate-deep-learning-inference", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-nvidia-quadro-400-empowers-designers-and-engineers-with-up-to-10-times-better-performance-6670224", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-nvidia-quadro-graphics-solutions-bring-the-power-of-the-fermi-architecture-to-all-engineers-designers-and-animators-6622980", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-nvidia-research-creates-interactive-worlds-with-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-nvidia-rtx-gpus-power-next-generation-of-workstations-and-pcs-for-millions-of-artists-designers-engineers-and-virtual-desktop-users", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-nvidia-tegra-processor-powers-the-tablet-revolution", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-open-industry-standard-introduced-for-connecting-next-generation-vr-headsets-to-pcs-other-devices", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-tegra-zone-app-is-your-destination-for-the-best-mobile-games-optimized-for-tegra-6670241", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-tegra-zone-app-is-your-destination-for-the-best-mobile-games-optimized-for-tegra-6670243", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/new-tegra-zone-app-is-your-destination-for-the-best-mobile-games-optimized-for-tegra-6670244", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nio-partners-with-nvidia-to-develop-a-new-generation-of-automated-driving-electric-vehicles", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/notice-for-nvidia-tablet-customers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/ntt-adopting-nvidia-ai-platform-to-power-company-wide-artificial-intelligence-initiative", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nuance-nvidia-medical-imaging-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-3dtv-play-software-now-available-bringing-3d-games-3d-photos-and-blu-ray-3d-movies-to-3d-hdtvs-6622978", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-accelerated-supercomputers-hit-new-highs-on-top500-list", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-accelerates-apache-spark-worlds-leading-data-analytics-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-accelerates-neural-graphics-pc-gaming-revolution-at-gdc-with-new-dlss-3-pc-games-and-tools", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ace-for-games-sparks-life-into-virtual-characters-with-generative-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-achieves-breakthroughs-in-language-understandingto-enable-real-time-conversational-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-achieves-record-number-of-design-wins-for-intel-sandy-bridge-pc-platform-6622966", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ada-lovelace-breaks-energy-efficiency-barrier-supercharges-laptop-designs", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-adds-gpu-acceleration-for-opencv-application-development-6622982", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-advances-ai-computing-revolution-with-new-volta-based-dgx-systems", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-advances-instant-ai-with-north-american-availability-of-base-command-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-advances-real-time-game-rendering-and-simulation-with-launch-of-nvidia-gameworks-sdk-3-1", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-aerial-5g-platform-extends-support-for-arm", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ai-delivers-major-advances-in-speech-recommender-system-and-hyperscale-inference", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ai-enterprise-software-drives-new-wave-of-certified-systems-from-worlds-leading-manufacturers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ai-on-5g-computing-platform-adopted-by-leading-service-and-network-infrastructure-providers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ai-workbench-speeds-adoption-of-custom-generative-ai-for-worlds-enterprises", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ampere-architecture-powers-record-70+-new-geforce-rtx-laptops", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-american-college-of-radiology-ai-lab-team-to-accelerate-adoption-of-ai-in-diagnostic-radiology-across-thousands-of-hospitals", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-arm-partner-to-bring-deep-learning-to-billions-of-iot-devices", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-arrow-electronics-bring-new-jetson-xavier-ai-computer-to-worlds-largest-industrial-markets", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-audi-marry-silicon-valley-technology-with-german-engineering", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-aurora-collaborate-to-build-next-generation-autonomous-vehicle-compute-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-bethesda-add-wolfenstein-youngblood-to-list-of-ray-traced-blockbuster-franchises", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-booz-allen-hamilton-expand-partnership-to-bring-ai-enabled-cybersecurity-to-public-and-private-sectors", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-dell-technologies-deliver-new-data-center-solution-for-zero-trust-security-and-the-era-of-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-deloitte-to-bring-new-services-built-on-nvidia-ai-and-omniverse-platforms-to-the-worlds-enterprises", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-global-computer-makers-launch-industry-standard-enterprise-server-platforms-for-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-global-partners-launch-new-hgx-a100-systems-to-accelerate-industrial-ai-and-hpc", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-google-cloud-deliver-powerful-new-generative-ai-platform-built-on-the-new-l4-gpu-and-vertex-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-google-cloud-to-create-industrys-first-ai-on-5g-lab-to-speed-development-of-ai-everywhere", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-hugging-face-to-connect-millions-of-developers-to-generative-ai-supercomputing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-julich-supercomputing-centre-to-build-quantum-computing-lab", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-kings-college-london-accelerate-brain-research-with-synthetic-image-project", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-microsoft-accelerate-ai-together", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-microsoft-boost-ai-cloud-computing-with-launch-of-industry-standard-hyperscale-gpu-accelerator", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-microsoft-to-bring-the-industrial-metaverse-and-ai-to-hundreds-of-millions-of-enterprise-users-via-azure-cloud", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-national-taiwan-university-aim-to-unlock-the-secrets-of-the-universe-6623024", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-partners-build-out-universal-scene-description-to-accelerate-industrial-metaverse-and-next-wave-of-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-partners-collaborate-on-arm-computing-for-cloud-hpc-edge-pc", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-partners-form-industry-s-broadest-parallel-computing-development-ecosystem-6623031", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-samsung-agree-to-settle-all-outstanding-ip-litigation", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-softbank-group-announce-termination-of-nvidias-acquisition-of-arm-limited", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-square-enix-team-up-to-deliver-stunning-version-of-final-fantasy-xv-on-pc", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-tech-leaders-team-to-build-gpu-accelerated-arm-servers-for-new-era-of-diverse-hpc-architectures", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-tomtom-develop-mapping-system-for-self-driving-cars", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-toyota-collaborate-to-accelerate-market-introduction-of-autonomous-cars", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-toyota-research-institute-advanced-development-partner-to-create-safer-autonomous-transportation", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-university-of-illinois-join-forces-to-release-world-s-first-textbook-on-programming-massively-parallel-processors-2775359", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-university-of-illinois-join-forces-to-release-world-s-first-textbook-on-programming-massively-parallel-processors-6623019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-vmware-to-accelerate-machine-learning-data-science-and-ai-workloads-on-vmware-cloud-on-aws-accelerated-by-nvidia-gpus", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-vmware-to-deliver-new-rich-graphics-capabilities-for-desktop-virtualization", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-vsg-accelerate-oil-amp-gas-exploration", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-and-zenrin-collaborate-on-ai-powered-hd-mapping-for-japan", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-2011-ces-press-conference-live-webcast-coverage-6622964", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-2016-ces-press-conference-live-webcast-coverage", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-availability-of-jarvis-interactive-conversational-ai-framework", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-availability-of-jetson-agx-orin-developer-kit-to-advance-robotics-and-edge-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-cloud-ai-video-streaming-platform-to-better-connect-millions-working-and-studying-remotely", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-cpu-for-giant-ai-and-high-performance-computing-workloads", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-dgx-gh200-ai-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-dgx-h100-systems-worlds-most-advanced-enterprise-ai-infrastructure", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-digital-twin-platform-for-scientific-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-extension-of-its-stock-repurchase-program-6623011", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2016", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2017", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2018", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2020", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2021", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2022", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2023", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2024", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-fourth-quarter-and-fiscal-2015", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-fourth-quarter-and-fiscal-2017", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-fourth-quarter-and-fiscal-2018", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-fourth-quarter-and-fiscal-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-fourth-quarter-and-fiscal-2020", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-fourth-quarter-and-fiscal-2021", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-fourth-quarter-and-fiscal-2022", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-fourth-quarter-and-fiscal-2023", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-second-quarter-fiscal-2016", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-second-quarter-fiscal-2017", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-second-quarter-fiscal-2018", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-second-quarter-fiscal-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-second-quarter-fiscal-2020", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-second-quarter-fiscal-2021", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-second-quarter-fiscal-2022", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-second-quarter-fiscal-2023", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-second-quarter-fiscal-2024", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-the-fourth-quarter-and-fiscal-2016", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-third-quarter-fiscal-2016", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-third-quarter-fiscal-2017", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-third-quarter-fiscal-2018", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-third-quarter-fiscal-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-third-quarter-fiscal-2020", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-third-quarter-fiscal-2021", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-third-quarter-fiscal-2022", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-third-quarter-fiscal-2023", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-third-quarter-fiscal-2024", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-first-quarter-fiscal-2022-revenue-tracking-above-outlook", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-four-for-one-stock-split-pending-stockholder-approval-at-annual-meeting-set-for-june-3", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-gameworks-dx12", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-geforce-special-event-featuring-ceo-jensen-huang", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-global-world-of-tanks-open-tournament-6653904", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-gpu-technology-conference-for-2010-6623010", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-gtc-2020-keynote-with-ceo-jensen-huang-set-for-may-14", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-hopper-architecture-the-next-generation-of-accelerated-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-hybrid-quantum-classical-computing-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-industrys-first-secure-smartnic-optimized-for-25g", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-instant-ai-infrastructure-for-enterprises", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-investor-day-for-financial-community", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-jetson-nano-99-tiny-yet-mighty-nvidia-cuda-x-ai-computer-that-runs-all-ai-models", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-jetson-xavier-nx-worlds-smallest-supercomputer-for-ai-at-the-edge", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-live-webcast-coverage-for-2010-ces-press-conference-6623023", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-major-release-of-omniverse-with-new-usd-connectors-and-tools-simulation-technologies-and-developer-frameworks", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-major-updates-to-triton-inference-server-as-25-000-companies-worldwide-deploy-nvidia-ai-inference", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-mellanox-infiniband-for-exascale-ai-supercomputing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-new-ai-partners-courses-initiatives-to-deliver-deep-learning-training-worldwide", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-new-dgx-superpod-the-first-cloud-native-multi-tenant-supercomputer-opening-world-of-ai-to-enterprise", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-new-g-sync-esports-displays", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-new-system-for-accelerated-quantum-classical-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-omniverse-cloud-to-connect-tens-of-millions-of-designers-and-creators", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-omniverse-open-beta-letting-designers-collaborate-in-real-time-from-home-or-around-the-world", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-omniverse-replicator-synthetic-data-generation-engine-for-training-ais", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-ovx-computing-systems-the-graphics-and-simulation-foundation-for-the-metaverse-powered-by-ada-lovelace-gpu", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-partnership-with-gsks-ai-powered-lab-for-discovery-of-medicines-and-vaccines", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-platform-for-creating-ai-avatars", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-preliminary-financial-resultsfor-second-quarter-fiscal-2023", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-pricing-of-2-0-billion-notes-offering", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-quarterly-cash-dividend-timing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-ready-made-nvidia-dgx-superpods-offered-by-global-network-of-certified-partners", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-record-adoption-of-new-turing-t4-cloud-gpu", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-scalable-gpu-accelerated-supercomputer-in-the-microsoft-azure-cloud", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-schedule-of-upcoming-events-with-the-financial-community-6623015", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-schedule-of-upcoming-events-with-the-financial-community-6623028", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-spectrum-high-performance-data-center-networking-infrastructure-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-annual-investor-day-2860246", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-2928895", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-3116166", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-3298364", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-4602636", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-5180559", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6630932", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6660585", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6679817", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6713490", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6734266", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6743786", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6746525", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6772720", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6785893", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6786986", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6791199", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6805345", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6860006", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6865300", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6872456", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6893211", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-for-financial-community-6893775", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-event-with-the-financial-community-6622999", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-3026954", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-3102681", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-3164753", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-3771009", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-5756070", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6138003", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6653451", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6670210", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6690562", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6695360", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6730851", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6759570", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6783232", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6801304", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6810636", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6812512", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6821990", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6824408", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6833969", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6843293", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6848618", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6856614", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6857391", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6883412", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6885150", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6889219", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6892653", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6894934", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6897247", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-with-the-financial-community-6622970", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-with-the-financial-community-6622987", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-world-s-first-ai-computer-to-make-robotaxis-a-reality", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-announces-worlds-first-functionally-safe-ai-self-driving-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-appoints-seasoned-it-exec-vishal-dhupar-as-managing-director-sales-and-marketing-for-south-asia-6622979", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-asml-tsmc-and-synopsys-set-foundation-for-next-generation-chip-manufacturing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-audi-partner-to-put-world-s-most-advanced-ai-car-on-road-by-2020", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-baidu-announce-partnership-to-accelerate-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-base-command-platform-provides-enterprises-with-fast-path-to-scale-production-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-boosts-capabilities-of-worlds-40-million-creativeswith-launch-of-nvidia-studio", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-boosts-iq-of-self-driving-cars-with-world-s-first-in-car-artificial-intelligence-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-boosts-worlds-leading-deep-learning-computing-platform-bringing-10x-performance-gain-in-six-months", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-brings-cuda-to-arm-enabling-new-path-to-exascale-supercomputing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-brings-generative-ai-to-worlds-enterprises-with-cloud-services-for-creating-large-language-and-visual-models", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-brings-large-language-ai-models-to-enterprises-worldwide", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-brings-millions-more-into-the-metaverse-with-expanded-omniverse-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-brings-power-of-geforce-gtx-to-gamers-everywhere-with-two-new-kepler-based-gpus-6653912", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-brings-rtx-4080-to-geforce-now", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-building-uks-most-powerful-supercomputer-dedicated-to-ai-research-in-healthcare", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-calls-uk-ai-strategy-important-step-will-open-cambridge-1-supercomputer-to-uk-healthcare-startups", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-cao-david-shannon-to-retire-at-year-s-end", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-captures-three-major-computex-awards-for-tesla-m40-jetson-tx1-shield-android-tv", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ceo-jensen-huang-to-host-ai-pioneers-yoshua-bengio-geoffrey-hinton-and-yann-lecun-and-others-at-gtc21", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ceo-jensen-huang-to-keynote-worlds-premier-ai-conference", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ceo-jensen-huang-to-unveil-new-ai-technologies-products-in-gtc-keynote-hundreds-of-industry-and-ai-leaders-to-speak-at-november-event", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-chief-scientist-bill-dally-receives-lifetime-achievement-award-from-leading-japanese-tech-society", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-chosen-by-every-major-computer-maker-every-major-cloud", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-city-year-san-jose-strong-neighborhoods-initiative-team-up-to-renovate-mckinley-elementary-school-and-community-center-6623025", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-collaborates-with-activision-and-bungie-to-bring-destiny-2-to-pc", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-collaborates-with-microsoft-to-accelerate-enterprise-ready-generative-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-completes-acquisition-of-mellanox-creating-major-force-driving-next-gen-data-centers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-debuts-geforce-rtx-3060-family-for-the-holidays", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-delivers-greatest-ever-generational-leap-in-performance-with-geforce-rtx-30-series-gpus", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-delivers-massive-performance-leap-for-deep-learning-hpc-applications-with-nvidia-tesla-p100-accelerators", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-delivers-quantum-leap-in-performance-introduces-new-era-of-neural-rendering-with-geforce-rtx-40-series", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-demonstrates-streaming-3d-video-using-microsoft-silverlight-6623001", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-dgx-station-a100-offers-researchers-ai-data-center-in-a-box", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-doubles-down-announces-a100-80gb-gpu-supercharging-worlds-most-powerful-gpu-for-ai-supercomputing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-doubles-performance-for-deep-learning-training", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-drive-constellation-now-available-virtual-proving-ground-for-validating-autonomous-vehicles", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-earns-double-wins-with-computex-best-choice-awards", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-egx-edge-ai-platform-brings-real-time-ai-to-manufacturing-retail-telco-healthcare-and-other-industries", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-enables-era-of-interactive-conversational-ai-with-new-inference-software", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-enters-production-with-drive-orin-announces-byd-and-lucid-group-as-new-ev-customers-unveils-next-gen-drive-hyperion-av-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-expands-cuda-developer-ecosystem-with-new-cuda-research-and-teaching-centers-in-the-u-s-canada-and-europe-6622983", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-expands-early-access-program-for-vmwares-project-monterey-to-enable-secure-accelerated-data-centers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-expands-geforce-gaming-to-millions-more-pcs-and-macs", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-expands-its-deep-learning-inference-capabilities-for-hyperscale-datacenters", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-expands-nvidia-clara-adds-global-healthcare-partners-to-take-on-covid-19", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-expands-omniverse-cloud-to-power-industrial-digitalization", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-expands-reach-with-new-geforce-laptops-and-desktops-geforce-now-partners-and-omniverse-for-creators", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-extends-3d-leadership-at-ces", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-extends-3d-leadership-at-ces-6623022", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-extends-data-center-infrastructure-processing-roadmap-with-bluefield-3", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-external-gpus-bring-new-creative-power-to-millions-of-artists-and-designers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-fleet-command-scales-edge-ai-services-for-enterprises", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-geforce-gpus-create-the-ultimate-gaming-platform-with-windows-7-6623040", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-geforce-gtx-590-is-world-s-fastest-graphics-card-6670239", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-geforce-rtx-2060-is-here-next-gen-gaming-takes-off", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-geforce-rtx-4070-brings-power-of-ada-lovelace-architecture-and-dlss-3-to-millions-more-gamers-and-creators-starting-at-599", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-geforce-rtx-powers-record-number-of-new-gaming-laptops", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-global-data-center-system-manufacturers-to-supercharge-generative-ai-and-industrial-digitalization", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-global-workstation-manufacturers-to-launch-powerful-systems-for-generative-ai-and-llm-development-content-creation-data-science", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-gpu-cloud-now-available-to-hundreds-of-thousands-of-ai-researchers-using-nvidia-desktop-gpus", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-gpu-plays-instrumental-role-in-discovery-of-new-pulsar-6670226", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-gpus-empower-software-developers-to-bring-gpu-accelerated-applications-to-the-masses-with-windows-7-directcompute-6623041", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-gpus-power-facebook-s-new-deep-learning-machine", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-gpus-to-accelerate-microsoft-azure", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-grace-drives-wave-of-new-energy-efficient-arm-supercomputers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-grace-hopper-superchip-powers-jupiter-defining-a-new-class-of-supercomputers-to-propel-ai-for-scientific-discovery", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-grace-hopper-superchips-designed-for-accelerated-generative-ai-enter-full-production", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-grid-2-0-launches-with-broad-industry-support", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-grid-delivers-100-graphics-accelerated-virtual-desktops-per-server", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-gtc-2022-to-feature-keynote-from-ceo-jensen-huang-new-products-900+-sessions-from-industry-and-ai-leaders", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-gtc-2023-to-feature-latest-advances-in-ai-computing-systems-generative-ai-industrial-metaverse-robotics-keynote-by-jensen-huang-talks-by-openai-deepmind-founders", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-gtc-news-to-be-shared-on-march-24-followed-by-investor-call", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-gtc-to-feature-ceo-jensen-huang-keynote-announcing-new-ai-and-metaverse-technologies-200+-sessions-with-top-tech-business-execs", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-hgx-2-gpu-accelerated-platform-gains-broad-adoption", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-hopper-gpus-expand-reach-as-demand-for-ai-grows", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-hopper-in-full-production", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-hpc-platform-hopper-quantum-2-worldwide-adoption", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ibm-and-toyota-keynotes-to-be-webcast-live-from-2016-gpu-technology-conference", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-inference-breakthrough-makes-conversational-ai-smarter-more-interactive-from-cloud-to-edge", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-60+-updates-to-cuda-x-libraries-opening-new-science-and-industries-to-accelerated-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-dlss-3-with-breakthrough-ai-powered-frame-generation-for-up-to-4x-performance", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-drive-agx-orin-advanced-software-defined-platform-for-autonomous-machines", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-drive-autopilot-worlds-first-commercially-available-level-2+-automated-driving-system", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-drive-av-safety-force-field-computational-defensive-driving-policy-to-shield-autonomous-vehicles-from-collisions", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-drive-constellation-simulation-system-to-safely-drive-autonomous-vehicles-billions-of-miles-in-virtual-reality", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-geforce-rtx-3060-next-generation-of-the-worlds-most-popular-gpu", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-generative-ai-foundry-service-on-microsoft-azure-for-enterprises-and-startups-worldwide", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-grace-cpu-superchip", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-hgx-2-fusing-hpc-and-ai-computing-into-unified-architecture-6696445", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-new-breed-of-high-performance-workstations-for-millions-of-data-scientists", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-new-family-of-bluefield-dpus-to-bring-breakthrough-networking-storage-and-security-performance-to-every-data-center", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-nvs-business-graphics-solutions-delivering-top-visual-fidelity-across-up-to-eight-displays-6622968", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-rapids-open-source-gpu-acceleration-platform-for-large-scale-data-analytics-and-machine-learning", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-the-beastly-geforce-gtx-1080-ti-fastest-gaming-gpu-ever", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-introduces-the-next-generation-in-cloud-gaming", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-invests-in-computing-s-future-with-awards-to-top-phd-students-6623008", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-isaac-launches-new-era-of-autonomous-machines", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-jarvis-simplifies-building-state-of-the-art-conversational-ai-services", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-jetson-orin-nano-sets-new-standard-for-entry-level-edge-ai-and-robotics-with-80x-performance-leap", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-jetson-tx2-enables-ai-at-the-edge", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-laptop-momentum-accelerates-as-partners-announce-25-new-models", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-accelerated-ethernet-platform-for-hyperscale-generative-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-ai-cloud-container-registry-to-accelerate-deep-learning-volta-gpus-debut-on-amazon-web-services", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-ai-computing-platform-for-medical-devices-and-computational-sensing-systems", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-data-center-scale-omniverse-computing-system-for-industrial-digital-twins", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-dgx-cloud-giving-every-enterprise-instant-access-to-ai-supercomputer-from-a-browser", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-edge-computing-platform-to-bring-real-time-ai-to-global-industries", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-gpu-cloud-platform-to-simplify-ai-development", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-igx-edge-ai-computing-platform-for-safe-secure-autonomous-systems", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-inference-platforms-for-large-language-models-and-generative-ai-workloads", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-its-first-living-room-entertainment-device-2860453", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-large-language-model-cloud-services-to-advance-ai-and-digital-biology", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-magnum-io-software-suite-to-help-eliminate-data-bottlenecks-for-data-scientists-and-ai-hpc-researchers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-morpheus-to-bring-ai-driven-automation-to-cybersecurity-industry", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-new-research-training-and-certification-programs-for-developers-focused-on-gpu-computing-6623000", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-new-shield-tv-the-most-advanced-streamer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-omniverse-cloud-services-for-building-and-operating-industrial-metaverse-applications", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-omniverse-design-collaboration-and-simulation-platform-for-enterprises", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-omniverse-for-developers-a-powerful-and-collaborative-game-creation-environment", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-revolutionary-volta-gpu-platform-fueling-next-era-of-ai-and-high-performance-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-search-for-next-great-gpu-research-projects-6623037", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-tegra-x1-mobile-super-chip", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-uks-most-powerful-supercomputer-for-research-in-ai-and-healthcare", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-launches-world-s-first-deep-learning-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-led-team-receives-25-million-contract-from-darpa-to-develop-high-performance-gpu-computing-systems-6622995", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-massachusetts-general-hospital-use-artificial-intelligence-to-advance-radiology-pathology-genomics", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-mgx-server-specification", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-microsoft-accelerate-cloud-enterprise-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-microsoft-epic-games-unity-and-leading-developers-kick-start-next-gen-gaming-at-gdc-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-20-new-cuda-research-and-training-centers-in-seven-nations-6622961", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-aarti-shah-to-board-of-directors", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-donald-robertson-as-chief-accounting-officer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-georgia-institute-of-technology-a-cuda-center-of-excellence-6622990", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-john-dabiri-to-board-of-directors", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-melissa-lora-to-board-of-directors", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-michael-mccaffery-persis-drell-to-board-of-directors", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-stephen-neal-to-board-of-directors", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-three-new-2010-cuda-fellows-6622976", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-tim-teter-as-general-counsel", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-top-5-startups-at-third-annual-emerging-companies-summit-6622981", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-university-of-maryland-a-cuda-center-of-excellence-6623017", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-names-university-of-tennessee-a-cuda-center-of-excellence-6623035", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-omniverse-opens-portals-to-vast-worlds-of-openusd", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-omniverse-scientific-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-opens-nvlink-for-custom-silicon-integration", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-optimus-and-3d-vision-notebooks-featuring-new-geforce-400m-series-gpus-arrive-for-the-holidays-6622986", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-optimus-technology-delivers-perfect-balance-of-notebook-performance-and-battery-life-6623016", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-parallel-nsight-delivers-gpu-computing-for-millions-of-microsoft-visual-studio-developers-6622998", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-partners-with-electronic-arts-to-bring-hit-games-to-geforce-now", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-partners-with-foxconn-to-build-factories-and-systemsfor-the-ai-industrial-revolution", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-partners-with-schrodinger-to-further-accelerate-drug-discovery-worldwide", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-partners-with-world-s-top-server-manufacturers-to-advance-ai-cloud-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-pascal-gpus-to-double-speed-of-europe-s-fastest-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-paves-path-to-ai-cities-with-metropolis-edge-to-cloud-platform-for-video-analytics", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-paves-way-for-tomorrow-s-cars-with-nvidia-drive-automotive-computers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-physx-technology-adopted-for-funcom-s-dreamworld-engine-6670238", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-powered-innovations-seize-center-stage-at-nab-2011-6670245", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-powered-innovations-seize-center-stage-at-nab-2011-6670246", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-powers-new-class-of-supercomputing-workstations-with-breakthrough-capabilities-for-design-and-engineering", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-powers-next-generation-supercomputer-at-university-of-edinburgh", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-powers-the-world-s-top-13-most-energy-efficient-supercomputers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-provides-business-update-for-second-quarter-of-fiscal-2011-6622997", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-provides-transportation-industry-access-to-its-deep-neural-networks-for-autonomous-vehicles", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-provides-u-s-postal-service-ai-technology-to-improve-delivery-service", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-quadro-rtx-6000-powers-worlds-fastest-laptop-asus-proart-studiobook-one", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-quadro-virtual-data-center-workstation-software-turns-tesla-gpu-servers-into-powerful-workstations", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-quantum-2-takes-supercomputing-to-new-heights-into-the-cloud", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-receives-approval-to-proceed-with-mellanox-acquisition-from-chinas-antitrust-authority", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-recognizes-chinese-academy-of-sciences-and-tsinghua-university-as-cuda-centers-of-excellence-6623038", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-recommends-stockholders-reject-mini-tender-offer-by-tutanota-llc", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-redefines-workstations-to-power-new-era-of-ai-design-industrial-metaverse", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-reinvents-computer-graphics-with-turing-architecture", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-reinvents-the-workstation-with-real-time-ray-tracing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-releases-jetson-xavier-nx-developer-kit-with-cloud-native-support", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-releases-major-omniverse-upgrade-with-generative-ai-and-openusd", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-reports-financial-results-for-first-quarter-fiscal-2011-6623005", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-reports-financial-results-for-fourth-quarter-and-fiscal-year-2010-6623013", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-reports-financial-results-for-second-quarter-fiscal-2011-6622994", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-reports-financial-results-for-third-quarter-fiscal-2011-6622975", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-reports-financial-results-for-third-quarter-fiscal-year-2010-6623034", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-responds-to-trc-capitals-mini-tender-offer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-reveals-the-titan-of-turing-titan-rtx", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-rolls-royce-and-classiq-announce-quantum-computing-breakthrough-for-computational-fluid-dynamics-in-jet-engines", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-rtx-extends-reach-across-top-applications-bringing-ray-tracing-ai-to-millions-of-creatives", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-rtx-on-new-wave-of-blockbuster-games-showcase-ray-tracing-at-gamescom", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-rtx-platform-brings-real-time-ray-tracing-and-ai-to-barrage-of-blockbuster-games", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-rtx-ray-tracing-accelerated-applications-available-to-millions-of-3d-artists-and-designers-this-year", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-rtx-studio-laptops-and-mobile-workstations-purpose-built-for-creators-coming-from-every-major-oem", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-rtx-technology-realizes-dream-of-real-time-cinematic-rendering", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-rtx-titles-jump-to-130-on-widespread-industry-adoption-of-ray-tracing-nvidia-dlss", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-s-deep-learning-car-computer-selected-by-volvo-on-journey-toward-a-crash-free-future", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-ai-inference-records-introduces-a30-and-a10-gpus-for-enterprise-servers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-2899736", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-3666930", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-5996161", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-6623006", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-6688051", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-6756473", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-6799866", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-6840951", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-6871430", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-first-quarter-financial-results-6891941", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-2831582", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-3136856", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-5696259", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-6623018", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-6669253", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-6789410", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-6832142", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-6863587", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-6887816", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-fourth-quarter-financial-results-6899475", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-second-quarter-financial-results-2983780", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-second-quarter-financial-results-4331681", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-second-quarter-financial-results-6622362", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-second-quarter-financial-results-6622996", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-second-quarter-financial-results-6706694", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-second-quarter-financial-results-6769669", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-second-quarter-financial-results-6809190", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-second-quarter-financial-results-6847669", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-second-quarter-financial-results-6894429", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-3095349", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-5014651", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-6622977", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-6648163", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-6727445", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-6781364", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-6819118", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-6855118", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-6882432", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-conference-call-for-third-quarter-financial-results-6896799", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-new-standard-for-workstation-performance-and-reliability-6653906", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sets-path-for-future-of-edge-ai-and-autonomous-machines-with-new-jetson-agx-orin-robotics-computer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-shakes-up-sub-200-graphics-market-with-new-geforce-gtx-650-ti-boost-gpu-6653903", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-shareholder-meeting-set-for-may-19-stockholders-can-participate-online", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ships-worlds-most-advanced-ai-system-nvidia-dgx-a100-to-fight-covid-19-third-generation-dgx-packs-record-5-petaflops-of-ai-performance", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-skips-holiday-party-and-donates-250-000-to-two-local-non-profits-1-000-employees-volunteers-to-build-urban-garden-restore-park", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sli-multi-gpu-technology-paired-with-intel-core-i7-extreme-edition-processors-power-world-s-fastest-desktop-gaming-platforms-6653916", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-sli-multi-gpu-technology-paired-with-intel-core-i7-extreme-edition-processors-power-world-s-fastest-desktop-gaming-platforms-6653917", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-smashes-performance-records-on-ai-inference", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-stockholder-meeting-set-for-june-2-individuals-can-participate-online", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-stockholder-meeting-set-for-june-22-individuals-can-participate-online", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-stockholder-meeting-set-for-june-9-individuals-can-participate-online", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-stockholder-meeting-set-for-may-16-individuals-can-participate-online", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-stockholder-meeting-set-for-may-20;-individuals-can-participate-online", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-stockholder-meeting-set-for-may-22-individuals-can-participate-online", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-stockholder-meeting-set-for-may-23-individuals-can-participate-online-6159480", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-supercharges-deep-learning-innovation-with-program-to-support-ai-startups-3949580", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-supercharges-hopper-the-worlds-leading-ai-computing-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-supercharges-record-80-gaming-laptop-models-with-turing-powered-gtx-16-series-gpus", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-supercharges-rendering-performance-with-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-supersizes-pc-gaming-with-new-breed-of-big-format-gaming-displays", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-taiwans-ministry-of-science-and-technology-to-accelerate-taiwan-ai-revolution-with-nvidia-ai-computing-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-taiwans-most-unveil-collaboration-to-supercharge-ai-efforts", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-teams-with-amazon-web-services-to-bring-ai-to-millions-of-connected-devices", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-teams-with-national-cancer-institute-u-s-department-of-energy-to-create-ai-platform-for-accelerating-cancer-research", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-tensor-core-gpus-accelerate-worlds-fastest-supercomputers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-tensorrt-3-dramatically-accelerates-ai-inference-for-hyperscale-data-centers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-tesla-gpus-enable-shorter-design-cycles-improved-product-quality-using-acusolve-6623002", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-tesla-gpus-to-communicate-faster-over-mellanox-infiniband-networks-6623029", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-tesla-p100-supercharges-hpc-applications-by-more-than-30x", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-titan-v-transforms-the-pc-into-ai-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-acquire-arm-for-40-billion-creating-worlds-premier-computing-company-for-the-age-of-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-acquire-mellanox-for-6-9-billion", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-announce-third-quarter-financial-results-in-conference-call-6623042", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-host-digital-gtc-in-october-featuring-keynote-from-ceo-jensen-huang-and-continuous-programming-around-the-world", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-host-financial-analyst-day-6623014", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-host-world-s-premier-ai-conference", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-host-worlds-top-ai-experts-at-gpu-technology-conference", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-provide-cfo-commentary-prior-to-quarterly-earnings-call-6623036", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-train-100-000-developers-on-deep-learning-in-2017", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-unveil-latest-accelerated-computing-breakthroughs-in-virtual-special-address-during-ces", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-to-wind-down-icera-modem-operations", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-transforms-mainstream-laptops-into-gaming-powerhouses-with-geforce-rtx-30-series", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-turbocharges-extreme-scale-ai-for-argonne-national-laboratorys-polaris-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-turing-t4-cloud-gpu-adoption-accelerates", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-untethers-gaming-with-project-shield-6653909", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-untethers-gaming-with-project-shield-6653910", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-ai-enterprise-software-suite-to-help-every-industry-unlock-the-power-of-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-ai-platform-to-minimize-downtime-in-supercomputing-data-centers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-drive-thor-centralized-car-computer-unifying-cluster-infotainment-automated-driving-and-parking-in-a-single-cost-saving-system", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-first-online-3d-vision-community-6622958", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-jetson-nano-2gb-the-ultimate-ai-and-robotics-starter-kit-for-students-educators-robotics-hobbyists", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-large-language-models-and-generative-ai-services-to-advance-life-sciences-r-d", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-new-weapon-of-choice-for-gamers-the-nvidia-geforce-gtx-660-ti-gpu-6653914", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-new-weapon-of-choice-for-gamers-the-nvidia-geforce-gtx-660-ti-gpu-6653915", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-nvidia-drive-atlan-an-ai-data-center-on-wheels-fornext-gen-autonomous-vehicles", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-palm-sized-energy-efficient-ai-computer-for-self-driving-cars", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-unveils-quadro-rtx-worlds-first-ray-tracing-gpu", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-updates-financial-guidance-for-fourth-quarter-of-fiscal-year-2019", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-ushers-in-new-era-of-robotics-with-breakthroughs-making-it-easier-to-build-and-train-intelligent-machines", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-verizon-wireless-demonstrate-full-hd-internet-tablet-for-4g-wireless-network", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-vr-platforms-deliver-massive-performance-boost-for-virtual-reality", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-wins-new-ai-inference-benchmarks", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-wins-quartet-of-major-awards-at-computex", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-with-microsoft-announces-technology-collaboration-for-era-of-intelligent-edge", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidia-zf-and-baidu-launch-industrys-first-ai-autonomous-vehicle-computer-for-china", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidias-new-ada-lovelace-rtx-gpu-arrives-for-designers-and-creators", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidias-new-ampere-data-center-gpu-in-full-production", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/nvidias-new-ethernet-networking-platform-for-ai-available-soon-from-dell-technologies-hewlett-packard-enterprise-lenovo", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-and-mubadala-sign-interim-agreement-for-bahrain-field-development", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-and-mubadala-sign-development-and-production-sharing-agreement-for-the-bahrain-field-development", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-announces-acquisition-of-phibro", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-announces-dividend", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-announces-dividend-increase", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-announces-first-quarter-net-income", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-announces-net-income-for-second-quarter-and-first-six-months-of-2009", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-announces-significant-california-oil-and-gas-discovery", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-announces-third-quarter-net-income", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-declares-dividend", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-mubadala-and-noga-form-joint-operating-company-in-bahrain-to-further-develop-the-bahrain-field", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-to-hold-earnings-conference-call", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-to-hold-earnings-conference-call-6833882", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-petroleum-to-hold-earnings-conference-call-6833883", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/occidental-replaces-210-percent-of-2008-production", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/openstack-software-adds-native-upstream-support-for-hdr-200-gigabit-infiniband-for-building-high-performance-clouds", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/oracle-and-nvidia-bring-the-power-of-the-cloud-to-the-next-generation-of-analytics-machine-learning-and-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/oracle-cloud-infrastructure-chooses-nvidia-bluefield-data-center-acceleration-platform", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/oracle-nvidia-speed-ai-adoption", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/pgi-accelerator-compilers-for-power-architecture-enable-easy-on-ramp-to-gpu-acceleration-with-power8-and-nvidia-nvlink", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/pixar-animation-studios-licenses-nvidia-technology-for-accelerating-feature-film-production", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/quake-ii-rtx-nvidias-reimagining-of-pc-gaming-classic-with-ray-traced-graphics-available-now", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/r-casey-olson-executive-vice-president-and-president-oxy-oil-and-gas-international-announces-retirement", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/rambus-and-nvidia-sign-patent-license-agreement-6622993", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/record-2008-performance-and-growth-highlighted-at-oxy-s-annual-stockholders-meeting", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/red-hat-and-nvidia-team-to-bring-high-performance-software-defined-5g-ran-to-telecom-industry", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/releases-20210113", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/releases-20210113-6829466", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/releases-20210113-6829467", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/releases-20210113-6829468", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/releases-20210113-6829469", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/releases-20210113-6829471", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/reliance-and-nvidia-partner-to-advance-ai-in-india-for-india", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/relive-a-classic-nvidia-remakes-quake-ii-with-stunning-ray-traced-graphics-gifts-to-pc-gamers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/revolutionary-mellanox-connectx-6-dx-smartnics-and-bluefield-2-io-processing-units-transform-cloud-and-data-center-security", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/servicenow-and-nvidia-announce-partnership-to-build-generative-ai-across-enterprise-it", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/servicenow-nvidia-accenture-ai-lighthouse", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/shutterstock-teams-with-nvidia-to-build-ai-foundation-models-for-generative-3d-artist-tools", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/siemens-and-nvidia-to-enable-industrial-metaverse", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/sixty-startups-from-around-the-world-to-showcase-tomorrow-s-technologies-at-nvidia-s-emerging-companies-summit-6622985", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/snowflake-and-nvidia-team-to-help-businesses-harness-their-data-for-generative-ai-in-the-data-cloud", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/softbank-telecom-data-centers-grace-hopper", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/swiss-national-supercomputing-centre-hewlett-packard-enterprise-and-nvidia-announce-worlds-most-powerful-ai-capable-supercomputer", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/taiwans-tech-titans-adopt-worlds-first-nvidia-grace-cpu-powered-system-designs", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/tata-partners-with-nvidia-to-build-large-scale-ai-infrastructure", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/tencent-cloud-adopts-nvidia-tesla-for-ai-cloud-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/tencent-games-partners-with-nvidia-to-launch-start-cloud-gaming-service", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/tesla-bio-workbench-enables-scientists-to-achieve-new-breakthroughs-in-biosciences", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/the-broad-institute-and-nvidia-bring-nvidia-clara-to-terra-cloud-platform-serving-25-000-researchers-advancing-biomedical-discovery", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/three-of-the-world-s-top-five-supercomputers-powered-by-nvidia-tesla-gpus-6622973", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/tiny-nvidia-supercomputer-to-bring-artificial-intelligence-to-new-generation-of-autonomous-robots-and-drones", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/tokyo-institute-of-technology-selected-as-japan-s-first-cuda-center-of-excellence-6623009", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/top-global-systems-makers-accelerate-adoption-of-nvidia-grace-and-grace-hopper", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/top-international-cloud-gaming-pioneers-standardize-on-nvidia-grid-platform-6653911", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/top-international-cloud-gaming-pioneers-standardize-on-nvidia-grid-platform-6653913", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/transportation-supplier-zf-and-nvidia-announce-ai-based-self-driving-system-for-cars-trucks-commercial-vehicles", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/u-s-patent-office-rejects-all-17-claims-in-three-rambus-patents-asserted-against-nvidia-in-international-trade-commission-action-6623026", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/uber-selects-nvidia-technology-to-power-its-self-driving-fleets", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/update-nvidia-announces-upcoming-event-for-financial-community", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/update-nvidia-to-provide-cfo-commentary-prior-to-quarterly-earnings-call", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/virtual-assistants-and-digital-humans-on-pace-to-ace-turing-test-with-new-nvidia-omniverse-avatar-cloud-engine", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/virtual-reality-ecosystem-embraces-nvidia-vrworks-making-it-the-gold-standard-for-developers", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/vmware-and-nvidia-to-enable-next-gen-hybrid-cloud-architecture-and-bring-ai-to-every-enterprise", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/vmware-and-nvidia-unlock-generative-ai-for-enterprises", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/volkswagen-and-nvidia-to-infuse-ai-into-future-vehicle-lineup", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/volvo-cars-and-autoliv-select-nvidia-drive-px-platform-for-self-driving-cars", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/volvo-cars-zoox-saic-and-more-join-growing-range-of-autonomous-vehicle-makers-using-new-nvidia-drive-solutions", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/volvo-selects-nvidia-drive-for-production-cars", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/watch-dogs-legion-to-support-directx-raytracing-on-pc-powered-by-nvidia-geforce-rtx", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/wave-of-ev-makers-choose-nvidia-drive-for-automated-driving", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/welcome-to-the-holodeck-nvidia-s-design-lab-of-the-future", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/will-your-next-shampoo-be-developed-on-gpus", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/will-your-next-shampoo-be-developed-on-gpus-6623021", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/winner-winner-chicken-dinner-nvidia-adds-new-technology-to-smash-hit-game-playerunknown-s-battlegrounds", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/with-great-power-comes-great-gaming-nvidia-launches-geforce-rtx-super-series", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/work-play-create-with-record-100+-new-nvidia-geforce-powered-laptops", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/world-s-largest-server-companies-announce-nvidia-volta-systems-supercharged-for-ai", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/worlds-leading-electronics-manufacturers-adopt-nvidia-generative-ai-and-omniverse-to-digitalize-state-of-the-art-factories", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/worlds-top-system-makers-unveil-nvidia-a100-powered-servers-to-accelerate-ai-data-science-and-scientific-computing", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/wpp-partners-with-nvidia-to-build-generative-ai-enabled-content-engine-for-digital-advertising", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/yamaha-motor-adopts-jetson-agx-xavier-for-autonomous-machines-for-land-air-and-sea", "metadata": {"callable": "web_scraper"}} +{"payload": "https://nvidianews.nvidia.com/news/zf-hella-nvidia-partner-to-increase-safety-of-self-driving-vehicles", "metadata": {"callable": "web_scraper"}} diff --git a/experimental/streaming_ingest_rag/producer/src/generate_dataset.py b/experimental/streaming_ingest_rag/producer/src/generate_dataset.py new file mode 100644 index 000000000..328121322 --- /dev/null +++ b/experimental/streaming_ingest_rag/producer/src/generate_dataset.py @@ -0,0 +1,104 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import zipfile +import io +import argparse +import json + +from pathlib import Path + +import jsonlines +import fitz + +FILE_HOME = Path(__file__).parent + + +def extract_archive(archive_path, extract_path="pdf_dataset"): + """ + Extract zip archive. + + Parameters + ---------- + archive_path: pathlib.Path + Path to archive. + + extract_path: pathlib.Path + Path to extract archive. + """ + + out_path = archive_path.parent.joinpath("dataset") + + with zipfile.ZipFile(archive_path, 'r') as zip_ref: + zip_ref.extractall(out_path) + + +def extract_text(pdf_stream): + """ + Use PyMuPDF to extract text from a bytestream PDF. + + Parameters + ---------- + pdf_stream : io.BytesIO + A bytestream PDF. + + Returns + ------- + str + A string of extracted text. + """ + + with fitz.open(stream=pdf_stream, filetype="pdf") as doc: + text_list = [page.get_text() for page in doc] + + text = "".join(text_list).replace('+', ' ') + text = text.encode("ascii", errors="ignore").decode() + + return text + + +def main(): + + """Generate jsonl dataset from compressed zip archive.""" + + # Extract archive + archive_path = FILE_HOME.joinpath("data", "dataset.zip") + extracted_pdfs_path = FILE_HOME.joinpath("data", "dataset") + extract_archive(archive_path, extract_path=extracted_pdfs_path) + + # Get all pdf paths + paths = extracted_pdfs_path.glob('**/*') + pdf_files = [x for x in paths if x.is_file()] + + # Generate jsonl dataset + jsonl_output = extracted_pdfs_path.parent.joinpath("raw_sample.jsonl") + + with jsonlines.open(jsonl_output, mode='w') as jsonl_writer: + + for pdf_file in pdf_files: + + with open(pdf_file, "rb") as f: + pdf_stream = io.BytesIO(f.read()) + extracted_text = extract_text(pdf_stream) + + extraction = { + "payload": extracted_text, + "metadata": {"callable": "raw_chunker"} + } + + jsonl_writer.write(extraction) + + +if __name__ == "__main__": + main() diff --git a/experimental/streaming_ingest_rag/producer/src/producer.py b/experimental/streaming_ingest_rag/producer/src/producer.py new file mode 100644 index 000000000..41662f266 --- /dev/null +++ b/experimental/streaming_ingest_rag/producer/src/producer.py @@ -0,0 +1,229 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import json +import argparse +import os + +from abc import ABC, abstractmethod + +import jsonlines + +from confluent_kafka.admin import AdminClient +from confluent_kafka.admin import NewTopic +from confluent_kafka import Producer + +N_KAFKA_TOPIC_PARTITIONS = os.environ['N_KAFKA_TOPIC_PARTITIONS'] + +class Publisher(ABC): + + def __init__( + self, + bootstrap_servers='kafka:19092', + client_id='publisher'): + + self._conf = { + 'bootstrap.servers': bootstrap_servers, + 'client.id': client_id} + + self._kafka_admin = AdminClient(self._conf) + + def _check_topic(self, topic): + topics = self._kafka_admin.list_topics().topics + if not topics.get(topic): + return False + return True + + def create_topic(self, topic): + + if not self._check_topic(topic): + + topic_list = [NewTopic( + topic, + num_partitions=int(N_KAFKA_TOPIC_PARTITIONS), + replication_factor=1)] + + self._kafka_admin.create_topics(topic_list) + + while(True): + if not self._check_topic(topic): + time.sleep(1.0) + continue + break + + print(f"Topic Created: {topic}") + print(f"Available Topics: \n {self._kafka_admin.list_topics().topics}") + + def delete_topic(self, topic): + + self._kafka_admin.delete_topics([topic]) # DELETE + + while(True): + if not self._check_topic(topic): + time.sleep(1.0) + continue + break + + print(f"Topic Deleted: {topic}") + print(f"Available Topics: \n {self._kafka_admin.list_topics().topics}") + + def publish_infinit(self, messages, topic, interval=1.0, infinit=False): + + producer = Producer(self._conf) + + while(True): + for msg in messages: + + curr_time = time.time() + + producer.produce( + topic, + json.dumps(msg), + callback=Publisher.acked) + + poll = producer.poll(1) + end_time = time.time() + # messages sents every interval seconds + time_delta = end_time - curr_time + time.sleep(min(max(0, interval - time_delta), interval)) + + producer.flush() + + def publish_batch(self, messages, topic, interval=0.01, n_messages=1000): + + producer = Producer(self._conf) + ctr = 0 + while(True): + for msg in messages: + + if ctr >= n_messages: + producer.flush() + + return + + curr_time = time.time() + + producer.produce( + topic, + json.dumps(msg), + callback=Publisher.acked) + + poll = producer.poll(1) + end_time = time.time() + # messages sents every interval seconds + time_delta = end_time - curr_time + time.sleep(min(max(0, interval - time_delta), interval)) + ctr += 1 + + def publish_single(self, message, topic): + + producer = Producer(self._conf) + + producer.produce( + topic, + json.dumps(message), + callback=Publisher.acked) + + poll = producer.poll(0) + producer.flush() + + + @staticmethod + def acked(err, msg): + + if err is not None: + print("Failed to deliver message: %s: %s" % (str(msg), str(err))) + + else: + print("Message produced: %s" % (str(msg))) + +def load_jsonl(fpath): + jsonl_list = [] + with jsonlines.open(fpath) as f: + for line in f: + jsonl_list.append(line) + return jsonl_list + + +def main(args): + + # load work queue + work_queue = load_jsonl(args.filepath) + + # initialize publisher + publisher = Publisher( + bootstrap_servers=args.bootstrap_servers, + client_id=args.client_id) + + # publish messages + publisher.create_topic(args.topic) + + if not args.loop: + publisher.publish_batch( + messages=work_queue, + topic=args.topic, + interval=args.interval, + n_messages=args.n_messages) + + else: + publisher.publish_infinit( + messages=work_queue, + topic=args.topic, + interval=args.interval) + + # delete topic + if args.delete_topic: + publisher.delete_topic(args.topic) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + + parser.add_argument("-f", "--filepath", type=str, + required=False, default="data/raw_sample.jsonl", + help="Path to work queue jsonl file. (Default value: data/test.jsonl)") + + parser.add_argument("-b", "--bootstrap-servers", type=str, + required=False, default='kafka:19092', + help="Kafka broker host:port. (Default value: kafka:19092)") + + parser.add_argument("-t", "--topic", type=str, + required=False, default='work_queue', + help="Kafka topic used to publish work. (Default value: work_queue)") + + parser.add_argument("-n", "--n-messages", type=int, + required=False, default=1000, + help="Total messages to produce. (Default value: 1000)") + + parser.add_argument("-i", "--interval", type=float, + required=False, default=0.001, + help="Inteval to publish messages. (Default value: 1.0)") + + parser.add_argument("-l", "--loop", + action='store_true', default=False, + help="Flag to continuously produce messages. (Default value: False)") + + parser.add_argument("-c", "--client-id", type=str, + required=False, default='publisher', + help="Client ID for the producer. (Default value: publisher)") + + parser.add_argument("-d", "--delete-topic", + action='store_true', default=False, + help="Flag to delete topic after producing completes. (Default value: False)") + + args = parser.parse_args() + + main(args) diff --git a/experimental/streaming_ingest_rag/utils/produce_messages.sh b/experimental/streaming_ingest_rag/utils/produce_messages.sh new file mode 100755 index 000000000..45cd1d300 --- /dev/null +++ b/experimental/streaming_ingest_rag/utils/produce_messages.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +helpFunction() +{ + echo "" + echo "usage: $0 [-s SOURCE_TYPE] [-n N_MESSAGES]" + echo "options:" + echo -e " -h Show this help message and exit." + echo -e " -s SOURCE_TYPE Source type to generate (url, raw, or both)" + echo -e " -n N_MESSAGES Number of messages to publish to Kafka. (Default value: 1000)" + + exit 1 # Exit script after printing help +} + +while getopts "h:s:n:" opt +do + case "$opt" in + s ) source_type="$OPTARG" ;; + n ) n_messages="$OPTARG" ;; + h ) helpFunction ;; # Print helpFunction in case parameter is non-existent + ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent + esac +done + +# Print helpFunction in case parameters are empty +if [ -z "$source_type" ] || [ -z "$n_messages" ] +then + echo "Some or all of the parameters are empty"; + helpFunction +fi + +if [ "url" == "$source_type" ] +then + docker exec -it producer bash -c " + python3 producer.py \ + --filepath data/url_sample.jsonl \ + --topic "scrape_queue" \ + --n-messages $n_messages \ + && echo Producing Complete!" +fi + +if [ "raw" == "$source_type" ] +then + docker exec -it producer bash -c " + python3 producer.py \ + --filepath data/raw_sample.jsonl \ + --topic "raw_queue" \ + --n-messages $n_messages \ + && echo Producing Complete!" +fi + +if [ "both" == "$source_type" ] +then + docker exec -it producer bash -c " + python3 producer.py \ + --filepath data/raw_sample.jsonl \ + --topic "raw_queue" \ + --n-messages $n_messages & \ + python3 producer.py \ + --filepath data/url_sample.jsonl \ + --topic "scrape_queue" \ + --n-messages $n_messages & + wait \ + && echo Producing Complete!" +fi diff --git a/integrations/langchain/embeddings/nemo_embed.py b/integrations/langchain/embeddings/nemo_embed.py new file mode 100644 index 000000000..a8c0320b5 --- /dev/null +++ b/integrations/langchain/embeddings/nemo_embed.py @@ -0,0 +1,102 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Nemo Embedding Microservice""" + +import requests +import json +import logging +from typing import Any, List, Sequence, Optional + +from langchain.pydantic_v1 import BaseModel +from langchain.schema.embeddings import Embeddings + +logger = logging.getLogger(__name__) + +class NemoEmbeddings(BaseModel, Embeddings): + """A custom Langchain Embedding class that integrates with Nemo Embedding MS + + Arguments: + server_url: (str) The URL of the Nemo Embedding MS to use. + model_name: (str) The name of the Nemo Embedding MS model to use. + """ + server_url: str = "http://localhost:9080/v1/embeddings" + model_name: str = "NV-Embed-QA-003" + + def __init__(self, *args: Sequence, **kwargs: Any): + super().__init__(*args, **kwargs) + + def _embed( + self, + query: Optional[str] = "", + input_type: Optional[str] = "query", + request_timeout: Optional[int] = 5, + **kwargs, + ) -> List[float]: + """ Function to get the embeddings from Nemo MS using REST API""" + + headers = {"accept": "application/json", "Content-Type": "application/json"} + data = {} + if query: + data["input"] = query + + if not data["input"]: + logger.warning("Valid query/passage not found in request") + return [] + + if self.model_name: + data["model"] = self.model_name + + if input_type: + data["input_type"] = input_type + + data["encoding_format"] = "float" + data["truncate"] = "END" + + response = None + request_timeout = int(request_timeout) + + if self.server_url is None: + logger.warning( + "Nemo Embedding Microservice URL not provided" + ) + return [] + + try: + response = requests.post(self.server_url, headers=headers, data=json.dumps(data), timeout=request_timeout) + response.raise_for_status() + except requests.exceptions.Timeout: + logger.info("Http request to Nemo Embedding Microservice timed out.") + except requests.exceptions.RequestException as e: + logger.info(f"An error occurred in Http request to Nemo Embedding Microservice endpoint {str(e)}") + + if response and response.json(): + response_data = response.json().get("data", {}) + if len(response_data): + return response_data[0].get("embedding", []) + else: + return [] + + else: + logger.info(f"Invalid or empty response returned by the Nemo Embedding Microservice endpoint {response}") + return [] + + def embed_query(self, text: str) -> List[float]: + """Input pathway for query embeddings.""" + return self._embed(query=text, input_type="query") + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Input pathway for document embeddings.""" + return [self._embed(query=text, input_type="passage") for text in texts] \ No newline at end of file diff --git a/integrations/langchain/llms/nemo_infer.py b/integrations/langchain/llms/nemo_infer.py new file mode 100644 index 000000000..1ef4a0484 --- /dev/null +++ b/integrations/langchain/llms/nemo_infer.py @@ -0,0 +1,157 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from functools import partial +from typing import Any, Callable, Dict, List, Optional + +import requests +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.base import LLM + + +class NemoInfer(LLM): + """A custom Langchain LLM class that integrates with NemoInfer MS. + + Arguments: + server_url: (str) The URL of the NemoInfer MS to use. + model_name: (str) The name of the NemoInfer MS model to use. + temperature: (str) Temperature to use for sampling + top_p: (float) The top-p value to use for sampling + stop: (List[str]) The words indicate stop generation of response + frequency_penalty: (float): penalty to each token that appears more frequently + streaming: (bool): Stream response + tokens: (int) The maximum number of tokens to generate. + """ + model: str = "llama" + temperature: float = 1 + stop: Optional[List[str]] = ["", ""] + n: Optional[int] = 1 + top_p: Optional[float] = 0.01 + frequency_penalty: Optional[float] = 0 + server_url: Optional[str] = "http://localhost:9999" + streaming: Optional[bool] = True + tokens: Optional[int] = 50 # This corresponds with max_tokens in openai schema + + @property + def _llm_type(self) -> str: + return "NemoInfer" + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling NemoInfer MS API.""" + + normal_params: Dict[str, Any] = { + "frequency_penalty": self.frequency_penalty, + "n": self.n, + "model": self.model, + "max_tokens": self.tokens, + "stream": self.streaming + } + + # Either temperature or top_p should be set not both + if self.temperature: + normal_params["temperature"] = self.temperature + elif self.top_p: + normal_params["top_p"] = self.top_p + + return {**normal_params} + + def _stream_response_to_generation_chunk(self, chunk): + """parse json response from nemo ms api + """ + try: + chunk = json.loads(chunk) + chunk = chunk.get("choices", [{}])[0].get("text", "") + return chunk + except Exception as e: + return "" + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """ + Execute an inference request. + + Args: + prompt: The prompt to pass into the model. + stop: A list of strings to stop generation when encountered + + Returns: + The string generated by the model + """ + + text_callback = None + # Register text_callback for streaming response + if run_manager: + text_callback = partial(run_manager.on_llm_new_token, verbose=self.verbose) + + if stop is None: + stop = self.stop + + # Request to Nemo Infer MS + data = {"prompt": prompt, "stop": stop, **self._default_params} + # Nemo MS uses max_tokens instead of token + if "tokens" in kwargs: + data["max_tokens"] = kwargs.get("tokens") + + if self.streaming: + return self._streaming_request( + data, text_callback, **kwargs + ) + try: + response = requests.post(self.server_url, json=data) + resp = response.json() + resp = resp.get("choices", [{}])[0].get("text", "") + return resp + except Exception as e: + print(f"Exception: {e} while generating response") + return "" + + def _streaming_request( + self, + data: Dict[str, Any], + text_callback: Optional[Callable[[str], None]] = None, + **kwargs: Any, + ) -> str: + """parse streaming response from nemo ms api + """ + response = requests.post(self.server_url, json=data, stream=True) + current_string = "" + resp = "" + + # Check the response status + if response.status_code == 200: + for chunk in response.iter_lines(): + chunk = chunk.decode("utf-8") + if chunk: + # data: is appended before every chunk, remove it to parse json + chunk = chunk.lstrip("data: ") + chunk = self._stream_response_to_generation_chunk(chunk) + # Unlike openai ms returns complete response instead of token + # find new generated chunk and send it for streaming + resp = chunk[len(current_string) :] + + # Nemo Infer MS sends stop words along response + if resp in data.get("stop", self.stop): + continue + if text_callback: + text_callback(resp) + current_string = chunk + return resp \ No newline at end of file diff --git a/models/Gemma/README.md b/models/Gemma/README.md index fdd78b46d..1221f90a8 100644 --- a/models/Gemma/README.md +++ b/models/Gemma/README.md @@ -6,7 +6,7 @@ For more details, refer the the [Gemma model card](https://ai.google.dev/gemma/d ## Customizing Gemma with NeMo Framework -Gemma models are compatiable with [NeMo Framework](https://docs.nvidia.com/nemo-framework/user-guide/latest/index.html). In this repository we have two notebooks that covert different ways of customizing Gemma. +Gemma models are compatible with [NeMo Framework](https://docs.nvidia.com/nemo-framework/user-guide/latest/index.html). In this repository we have two notebooks that covert different ways of customizing Gemma. ### Paramater Efficient Fine-Tuning with LoRA diff --git a/notebooks/03_llama_index_simple.ipynb b/notebooks/03_llama_index_simple.ipynb index 4657b4d70..276e6021c 100644 --- a/notebooks/03_llama_index_simple.ipynb +++ b/notebooks/03_llama_index_simple.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "9a4cb825-0940-44a7-9f79-c1ca73b37906", + "id": "d7bbdbd7", "metadata": {}, "source": [ "# Notebook 3: Document Question-Answering with LlamaIndex\n", @@ -33,7 +33,7 @@ }, { "cell_type": "markdown", - "id": "d76e8af7-2124-4cb6-8ade-e1c1c42d1701", + "id": "953946f1", "metadata": {}, "source": [ "### Step 1: Integrate TensorRT-LLM to LangChain *and* LlamaIndex\n", @@ -54,7 +54,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "8a80987e-1ddb-4248-b76c-f3ce16745ca3", + "id": "7919fd82", "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "markdown", - "id": "bc57b68d-afd5-4a0c-832c-0ad8f3f475d5", + "id": "18600300", "metadata": {}, "source": [ "### Step 2: Create a Prompt Template\n", @@ -84,7 +84,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "682ec812-33be-430f-8bb1-ae3d68690198", + "id": "4fa60e49", "metadata": {}, "outputs": [], "source": [ @@ -102,7 +102,7 @@ }, { "cell_type": "markdown", - "id": "056850b3-70c6-438a-9c35-e017ab611252", + "id": "6063c0e0", "metadata": {}, "source": [ "### Step 3: Load Documents\n", @@ -123,7 +123,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "e9457012-e436-4371-9157-56c1ce4be667", + "id": "4f14d618", "metadata": {}, "outputs": [ { @@ -141,7 +141,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "4f9adbc8-2060-4b16-9252-ac6727b862ee", + "id": "81fe0d1c", "metadata": {}, "outputs": [ { @@ -169,7 +169,7 @@ }, { "cell_type": "markdown", - "id": "f03d6d82-8157-4dbc-97dd-29e3b990f8aa", + "id": "068e61bd", "metadata": {}, "source": [ "### Step 4: Transform Documents with Text Splitting and a Node Parser\n", @@ -188,7 +188,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "fa366250-108e-45a0-88ce-e6f7274da8e1", + "id": "cdcd2b05", "metadata": {}, "outputs": [ { @@ -220,7 +220,7 @@ }, { "cell_type": "markdown", - "id": "cf9e2595-ae85-4c00-b561-d7d1a40933bf", + "id": "2b27c7b7", "metadata": {}, "source": [ "Additionally, we use a LlamaIndex [``PromptHelper``](https://gpt-index.readthedocs.io/en/latest/api_reference/service_context/prompt_helper.html) to help deal with LLM context window token limitations. It calculates available context size to the LLM by taking the initial context token length and subtracting out reserved token space for the prompt template and output. It provides a utility for re-packing text chunks from the index to maximally use the context window to minimize requests sent to the LLM.\n", @@ -234,7 +234,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "dc9a6082-34a0-4aa7-964b-7fe3f2015aa9", + "id": "1f429667", "metadata": {}, "outputs": [], "source": [ @@ -250,7 +250,7 @@ }, { "cell_type": "markdown", - "id": "b8dab583-a12d-4fb1-a9eb-3a1b1f04075d", + "id": "ca97830a", "metadata": {}, "source": [ "### Step 5: Generate and Store Embeddings\n", @@ -265,7 +265,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "e9011ba0-f3f6-41f0-8a15-48f264743545", + "id": "0fa4c0fd", "metadata": {}, "outputs": [], "source": [ @@ -288,7 +288,7 @@ }, { "cell_type": "markdown", - "id": "8db99124-e438-406d-880d-557501a461d3", + "id": "22aa461b", "metadata": {}, "source": [ "#### b) Store Embeddings \n", @@ -299,7 +299,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "0e493f9d-589a-4820-902d-f68932bfb0d8", + "id": "4a11b80f", "metadata": {}, "outputs": [], "source": [ @@ -314,7 +314,7 @@ }, { "cell_type": "markdown", - "id": "d339a5b9-0d76-43e7-86d7-0f544f0805a2", + "id": "c14162d7", "metadata": {}, "source": [ "Set the service context globally, to avoid passing it to every llm call/" @@ -323,7 +323,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "ba0efae7-a8ad-4db0-80ea-7edd69bf4719", + "id": "48d000dd", "metadata": {}, "outputs": [], "source": [ @@ -332,9 +332,8 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "79c7923c-d778-4f32-be37-4314063ecd2f", + "id": "7584850f", "metadata": {}, "source": [ "
\n", @@ -346,7 +345,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "1e94e53e-41a9-47d3-a9d3-7c0af4c07f76", + "id": "50b5fbfc", "metadata": {}, "outputs": [], "source": [ @@ -360,9 +359,8 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "5783e23b", + "id": "6af82726", "metadata": {}, "source": [ "Let's load the documents into the vector database index" @@ -371,7 +369,7 @@ { "cell_type": "code", "execution_count": null, - "id": "474b8820", + "id": "b49c4acf", "metadata": {}, "outputs": [], "source": [ @@ -384,7 +382,7 @@ }, { "cell_type": "markdown", - "id": "57e7aa7f-a219-44fe-8757-432daf278f6a", + "id": "126cda61", "metadata": {}, "source": [ "### Step 6: Build the Query Engine and Stream Response\n", @@ -402,7 +400,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f56f37e0-341e-4d7d-b282-f374a16f55b2", + "id": "cd24b951", "metadata": {}, "outputs": [], "source": [ @@ -411,7 +409,7 @@ }, { "cell_type": "markdown", - "id": "a2359014-ef1f-4d0f-bac9-8fdd37a93351", + "id": "90b61943", "metadata": {}, "source": [ "#### b) Stream a Response from the Query Engine\n", @@ -421,7 +419,7 @@ { "cell_type": "code", "execution_count": null, - "id": "38d23754-ea6b-47ce-8b3b-ebd37c0f5693", + "id": "97a018d6", "metadata": {}, "outputs": [], "source": [ @@ -450,7 +448,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/notebooks/07_Option(1)_NVIDIA_AI_endpoint_simple.ipynb b/notebooks/07_Option(1)_NVIDIA_AI_endpoint_simple.ipynb new file mode 100755 index 000000000..7944dd24c --- /dev/null +++ b/notebooks/07_Option(1)_NVIDIA_AI_endpoint_simple.ipynb @@ -0,0 +1,345 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4ff7339a", + "metadata": {}, + "source": [ + "## Notebook 7-Option(1): A simple example of [NVIDIA_AI_Endpoint integrated with langchain](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints) \n", + "\n", + "In this notebook, we are going to use the **mixtral_8x7b as LLM** as well as the **nvolveqa_40k embedding** provided by [NVIDIA_AI_Endpoint](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints) and build a simply RAG example with faiss as vectorstore\n", + "\n", + "### Prerequisite \n", + "In order to successfully run this notebook, you will need the following -\n", + "\n", + "1. Already successfully gone through the [setup](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup) and generated an API key.\n", + "2. install necesary python dependencies in [requirements.txt](https://github.com/NVIDIA/GenerativeAIExamples/blob/3d29acf677466c5c301370cab5867cb09e04e318/notebooks/requirements.txt) : then upgrade the langchain-core with the below \n", + "pip install langchain-core==0.1.15 \n", + "\n", + "Note: change **faiss-gpu --> faiss-cpu** in pre-requisite 2\n", + "if you do not have access to a GPU.\n" + ] + }, + { + "cell_type": "markdown", + "id": "612375a9", + "metadata": {}, + "source": [ + "### Step 1 - Export the NVIDIA_API_KEY\n", + "You can supply the NVIDIA_API_KEY directly in this notebook when you run the cell below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8d6bbec", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install langchain-core==0.1.15\n", + "!pip install faiss-cpu # replace with faiss-gpu if you are using GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d479e614", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "## API Key can be found by going to NVIDIA NGC -> AI Foundation Models -> (some model) -> Get API Code or similar.\n", + "## 10K free queries to any endpoint (which is a lot actually).\n", + "\n", + "# del os.environ['NVIDIA_API_KEY'] ## delete key and reset\n", + "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n", + " print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n", + "else:\n", + " nvapi_key = getpass.getpass(\"NVAPI Key (starts with nvapi-): \")\n", + " assert nvapi_key.startswith(\"nvapi-\"), f\"{nvapi_key[:5]}... is not a valid key\"\n", + " os.environ[\"NVIDIA_API_KEY\"] = nvapi_key" + ] + }, + { + "cell_type": "markdown", + "id": "5b4afb52", + "metadata": {}, + "source": [ + "### Step 2 - initialize the LLM \n", + "Here we will use **mixtral_8x7b** " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d3cb734", + "metadata": {}, + "outputs": [], + "source": [ + "# test run and see that you can genreate a respond successfully \n", + "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", + "llm = ChatNVIDIA(model=\"mixtral_8x7b\", nvidia_api_key=nvapi_key)\n", + "result = llm.invoke(\"Write a ballad about LangChain.\")\n", + "print(result.content)" + ] + }, + { + "cell_type": "markdown", + "id": "93bba1c4", + "metadata": {}, + "source": [ + "### Step 3 - We intiatlize the embedding as well \n", + "We selected **nvolveqa_40k** as the embedding \n" + ] + }, + { + "cell_type": "markdown", + "id": "bf81da21", + "metadata": {}, + "source": [ + "## first we initialize the embedding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "850b13c6", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings\n", + "\n", + "embedder = NVIDIAEmbeddings(model=\"nvolveqa_40k\")\n", + "\n", + "# Alternatively, if you want to specify whether it will use the query or passage type\n", + "# embedder = NVIDIAEmbeddings(model=\"nvolveqa_40k\", model_type=\"passage\")" + ] + }, + { + "cell_type": "markdown", + "id": "d2104106", + "metadata": {}, + "source": [ + "### Step 4 - Obtain some toy text dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31699728", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from tqdm import tqdm\n", + "from pathlib import Path\n", + "\n", + "# Here we read in the text data and prepare them into vectorstore\n", + "ps = os.listdir(\"./toy_data/\")\n", + "data = []\n", + "sources = []\n", + "for p in ps:\n", + " if p.endswith('.txt'):\n", + " path2file=\"./toy_data/\"+p\n", + " with open(path2file,encoding=\"utf-8\") as f:\n", + " lines=f.readlines()\n", + " for line in lines:\n", + " if len(line)>=1:\n", + " data.append(line)\n", + " sources.append(path2file)" + ] + }, + { + "cell_type": "markdown", + "id": "710c5a6e", + "metadata": {}, + "source": [ + "### Step 5 - Do some basic cleaning and remove empty lines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a005bd42", + "metadata": {}, + "outputs": [], + "source": [ + "documents=[d for d in data if d is not '\\n']\n", + "len(data), len(documents), data[0]" + ] + }, + { + "cell_type": "markdown", + "id": "7aa261d0", + "metadata": {}, + "source": [ + "### Step 6a (optional) - Speed test: check how fast ( in seconds) processing 1 document vs. a batch of 10 documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc07f199", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "print(\"Single Document Embedding: \")\n", + "s = time.perf_counter()\n", + "q_embedding = embedder.embed_documents([documents[0]])\n", + "elapsed = time.perf_counter() - s\n", + "print(\"\\033[1m\" + f\"Executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")\n", + "print(\"Shape:\", (len(q_embedding),))\n", + "\n", + "print(\"\\nBatch Document Embedding: \")\n", + "s = time.perf_counter()\n", + "d_embeddings = embedder.embed_documents(documents[:10])\n", + "elapsed = time.perf_counter() - s\n", + "print(\"\\033[1m\" + f\"Executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")\n", + "print(\"Shape:\",len(d_embeddings[0]))" + ] + }, + { + "cell_type": "markdown", + "id": "4c5b0aee", + "metadata": {}, + "source": [ + "### Step 6b - Process the documents into faiss vectorstore and save it to disk" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5b8b6a1", + "metadata": {}, + "outputs": [], + "source": [ + "# Here we create a vector store from the documents and save it to disk.\n", + "import faiss\n", + "from operator import itemgetter\n", + "from langchain.vectorstores import FAISS\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", + "import faiss\n", + "import pickle\n", + "# create my own uuid \n", + "text_splitter = CharacterTextSplitter(chunk_size=400, separator=\" \")\n", + "docs = []\n", + "metadatas = []\n", + "\n", + "for i, d in enumerate(documents):\n", + " splits = text_splitter.split_text(d)\n", + " #print(len(splits))\n", + " docs.extend(splits)\n", + " metadatas.extend([{\"source\": sources[i]}] * len(splits))\n", + "\n", + "store = FAISS.from_texts(docs, embedder , metadatas=metadatas)\n", + "faiss.write_index(store.index, \"./toy_data/nv_embedding.index\")\n", + "store.index = None\n", + "with open(\"./toy_data/nv_embedding.pkl\", \"wb\") as f:\n", + " pickle.dump(store, f)\n", + "# you will only need to do this once, later on we will restore the already saved vectorstore" + ] + }, + { + "cell_type": "markdown", + "id": "3787d615", + "metadata": {}, + "source": [ + "### Step 6c - Read the previously processed & saved Faiss vectore store back" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d889737a", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the vectorestore back.\n", + "import faiss\n", + "import pickle\n", + "\n", + "index = faiss.read_index(\"./toy_data/nv_embedding.index\")\n", + "with open(\"./toy_data/nv_embedding.pkl\", \"rb\") as f:\n", + " store = pickle.load(f)\n", + "store.index = index" + ] + }, + { + "cell_type": "markdown", + "id": "a03406c1", + "metadata": {}, + "source": [ + "### Step 7- Wrap the restored vectorsore into a retriever and ask our question " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e032143e", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "retriever = store.as_retriever()\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"Answer solely based on the following context:\\n\\n{context}\\n\",\n", + " ),\n", + " (\"user\", \"{question}\"),\n", + " ]\n", + ")\n", + "\n", + "model = ChatNVIDIA(model=\"mixtral_8x7b\")\n", + "\n", + "chain = (\n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | model\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "chain.invoke(\"Tell me about Sweden.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d367584c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/07_Option(2)_minimalistic_RAG_with_langchain_local_HF_LLM.ipynb b/notebooks/07_Option(2)_minimalistic_RAG_with_langchain_local_HF_LLM.ipynb new file mode 100755 index 000000000..e4c4fa81f --- /dev/null +++ b/notebooks/07_Option(2)_minimalistic_RAG_with_langchain_local_HF_LLM.ipynb @@ -0,0 +1,445 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0b7b56a8", + "metadata": {}, + "source": [ + "## Notebook 7-Option(2): minimalistic RAG example with langchain and load a local LLM from HuggingFace \n", + "\n", + "In this notebook, we are going to use the checkpoint from [HuggingFace Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf). \n", + "\n", + "\n", + "
\n", + " \n", + "⚠️ The notebook before this one, `07_Option(1)_NVIDIA_AI_endpoint_simple.ipynb`, contains the same exercise as this notebook but uses NVIDIA AI endpoints' models via API calls instead of loading the models' checkpoints pulled from huggingface model hub, and then load from host to devices (i.e GPUs).\n", + "\n", + "Noted that, since we will load the checkpoints, it will be significantly slower to go through this entire notebook. \n", + "\n", + "If you do decide to go through this notebook, please kindly check the **Prerequisite** section below.\n", + "\n", + "
\n", + "\n", + "\n", + "### Prerequisite \n", + "In order to successfully run this notebook, you will need the following -\n", + "\n", + "1. Already being approved of using the checkpoints via applying for [meta-llama](https://huggingface.co/meta-llama)\n", + "2. At least 2 NVIDIA GPUs, each with at least 32G mem, preferably using Ampere architecture\n", + "3. docker and [nvidia-docker](https://github.com/NVIDIA/nvidia-container-toolkit) installed \n", + "4. Registered [NVIDIA NGC](https://www.nvidia.com/en-us/gpu-cloud/) and can pull and run NGC pytorch containers\n", + "5. install necesary python dependencies : \n", + "Note: if you are using the [Dockerfile.gpu_notebook](https://github.com/NVIDIA/GenerativeAIExamples/blob/main/notebooks/Dockerfile.gpu_notebooks), it should already prepare the environment for you. Otherwise please refer to the Dockerfile for environment building.\n", + "overwrite the langchain-core version via **pip install langchain-core==0.1.15**\n", + "\n", + "\n", + "The notebook will walk you through how to build an end-to-end RAG pipeline using [LangChain](https://python.langchain.com/docs/get_started/introduction), [faiss](https://python.langchain.com/docs/integrations/vectorstores/faiss) as the vectorstore and a custom llm of your choice from huggingface ( more specifically, we will be using [HuggingFace Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) in this notebook, but the process is similar for other llms from huggingface.\n", + "\n", + "\n", + "Generically speaking, the RAG pipeline will involve 2 phases -\n", + "\n", + "The first one is the preprocessing phase illustrated below -" + ] + }, + { + "cell_type": "markdown", + "id": "3cd42886", + "metadata": {}, + "source": [ + "![preprocessing](./imgs/preprocessing.png)" + ] + }, + { + "cell_type": "markdown", + "id": "b9136f2c", + "metadata": {}, + "source": [ + "The second phase is the inference runtime -\n", + "\n", + "![inference_runtime](./imgs/inference_runtime.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a44dada2", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install langchain-core==0.1.15 faiss-gpu" + ] + }, + { + "cell_type": "markdown", + "id": "0e6baa43", + "metadata": {}, + "source": [ + "---\n", + "Let's now go through this notebook step-by-step \n", + "For the first phase, reminder of the flow \n", + "![preprocessing](./imgs/preprocessing.png)\n", + "\n", + "### Step 1 - Load huggingface embedding " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7550842", + "metadata": {}, + "outputs": [], + "source": [ + "### load custom embedding and use it in Faiss \n", + "from langchain.vectorstores import FAISS\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.chains import RetrievalQA\n", + "from langchain.document_loaders import TextLoader\n", + "from langchain.document_loaders import PyPDFLoader\n", + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "\n", + "embedding_model_name = \"sentence-transformers/all-mpnet-base-v2\" # sentence-transformer is the most commonly used embedding\n", + "emd_model_kwargs = {\"device\": \"cuda\"}\n", + "hf_embedding = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=emd_model_kwargs)\n" + ] + }, + { + "cell_type": "markdown", + "id": "6c44b238", + "metadata": {}, + "source": [ + "### Step 2 - Prepare the toy text dataset \n", + "We will prepare the XXX.txt files ( there should be Sweden.txt and and using the above embedding to parse chuck of text and store them into faiss-gpu vectorstore" + ] + }, + { + "cell_type": "markdown", + "id": "1d6837e7", + "metadata": {}, + "source": [ + "Let's have a look at text datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "873fbf3b", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "head -1 ./toy_data/Sweden.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "435e057e", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "head -3 ./toy_data/Titanic_film.txt" + ] + }, + { + "cell_type": "markdown", + "id": "4de1685c", + "metadata": {}, + "source": [ + "### Step 3 - Process the document into faiss vectorstore and save to disk" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e17dee5", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from tqdm import tqdm\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.vectorstores import FAISS\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from pathlib import Path\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "import faiss\n", + "from langchain.vectorstores import FAISS,utils\n", + "import pickle\n", + "\n", + "# Here we read in the text data and prepare them into vectorstore\n", + "ps = list(Path(\"./toy_data/\").glob('**/*.txt'))\n", + "print(ps)\n", + "data = []\n", + "sources = []\n", + "for p in ps:\n", + " with open(p,encoding=\"utf-8\") as f:\n", + " data.append(f.read())\n", + " sources.append(p)\n", + "\n", + "# We do this due to the context limits of the LLMs.\n", + "# Here we split the documents, as needed, into smaller chunks.\n", + "# We do this due to the context limits of the LLMs.\n", + "\n", + "text_splitter = CharacterTextSplitter(chunk_size=200, separator=\"\\n\")\n", + "docs = []\n", + "metadatas = []\n", + "for i, d in enumerate(data):\n", + " splits = text_splitter.split_text(d)\n", + " docs.extend(splits)\n", + " metadatas.extend([{\"source\": sources[i]}] * len(splits))\n", + "\n", + "# Here we create a vector store from the documents and save it to disk.\n", + "store = FAISS.from_texts(docs, hf_embedding, metadatas=metadatas)\n", + "faiss.write_index(store.index, \"./toy_data/hf_embedding_docs.index\")\n", + "store.index = None\n", + "with open(\"./toy_data/hf_embeddingfaiss_store.pkl\", \"wb\") as f:\n", + " pickle.dump(store, f)\n", + "# you will only need to do this once, later on we will restore the already saved vectorstore\n" + ] + }, + { + "cell_type": "markdown", + "id": "61b91fa3", + "metadata": {}, + "source": [ + "### Step 4 - Reload the already saved vectorstore and prepare for retrival" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a635211d", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the LangChain.\n", + "from pathlib import Path\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "import faiss\n", + "from langchain.vectorstores import FAISS\n", + "import pickle\n", + "\n", + "index = faiss.read_index(\"./toy_data/hf_embedding_docs.index\")\n", + "\n", + "with open(\"./toy_data/hf_embeddingfaiss_store.pkl\", \"rb\") as f:\n", + " store = pickle.load(f)\n", + "\n", + "store.index = index" + ] + }, + { + "cell_type": "markdown", + "id": "fac189ce", + "metadata": {}, + "source": [ + "\n", + "### Step 5 - Prepare the loaded vectorstore into a retriver " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7d66c59", + "metadata": {}, + "outputs": [], + "source": [ + "retriever = store.as_retriever(search_type='similarity', search_kwargs={\"k\": 3}) # k is a hyperparameter, usally by default set to 3 " + ] + }, + { + "cell_type": "markdown", + "id": "5e0b7a9c", + "metadata": {}, + "source": [ + "Now we are finally done with the preprocessing step, next we will proceed to phase 2\n", + "\n", + "--- \n", + "Recall phase 2 involve a runtime which we could query the already loaded faiss vectorstore. \n", + "\n", + "![inference](./imgs/inference_runtime.png)\n" + ] + }, + { + "cell_type": "markdown", + "id": "d1c56e53", + "metadata": {}, + "source": [ + "### Step 6 - Load the [HuggingFace Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) to your GPUs\n", + "\n", + "Note: Scroll down and make sure you supply the **hf_token in code block below [FILL_IN] your huggingface token**\n", + ", for how to generate the token from huggingface, please following instruction from [this link](https://huggingface.co/docs/transformers.js/guides/private)\n", + "\n", + "Note: The execution of cell below will take up sometime, please be patient until the checkpoint is fully loaded. Alternatively, turn to previous notebook 07_Option(1)_NVIDIA_AI_endpoint_simply.ipynb if you wish to use already deployed models as API calls instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62f3cc61", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch\n", + "import transformers\n", + "from langchain import HuggingFacePipeline\n", + "from transformers import (\n", + " AutoConfig,\n", + " AutoModel,\n", + " AutoModelForCausalLM,\n", + " AutoTokenizer,\n", + " GenerationConfig,\n", + " LlamaForCausalLM,\n", + " LlamaTokenizer,\n", + " pipeline,\n", + ")\n", + "\n", + "def load_model(model_name_or_path, device, num_gpus, hf_auth_token=None, debug=False):\n", + " \"\"\"Load an HF locally saved checkpoint.\"\"\"\n", + " if device == \"cpu\":\n", + " kwargs = {}\n", + " elif device == \"cuda\":\n", + " kwargs = {\"torch_dtype\": torch.float16}\n", + " if num_gpus == \"auto\":\n", + " kwargs[\"device_map\"] = \"auto\"\n", + " else:\n", + " num_gpus = int(num_gpus)\n", + " if num_gpus != 1:\n", + " kwargs.update(\n", + " {\n", + " \"device_map\": \"auto\",\n", + " \"max_memory\": {i: \"20GiB\" for i in range(num_gpus)},\n", + " }\n", + " )\n", + " elif device == \"mps\":\n", + " kwargs = {\"torch_dtype\": torch.float16}\n", + " # Avoid bugs in mps backend by not using in-place operations.\n", + " print(\"mps not supported\")\n", + " else:\n", + " raise ValueError(f\"Invalid device: {device}\")\n", + "\n", + " if hf_auth_token is None:\n", + " tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False)\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " model_name_or_path, low_cpu_mem_usage=True, **kwargs\n", + " )\n", + " else:\n", + " tokenizer = AutoTokenizer.from_pretrained(\n", + " model_name_or_path, use_auth_token=hf_auth_token, use_fast=False\n", + " )\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " model_name_or_path,\n", + " low_cpu_mem_usage=True,\n", + " use_auth_token=hf_auth_token,\n", + " **kwargs,\n", + " )\n", + "\n", + " if device == \"cuda\" and num_gpus == 1:\n", + " model.to(device)\n", + "\n", + " if debug:\n", + " print(model)\n", + "\n", + " return model, tokenizer\n", + "\n", + "model_name=\"meta-llama/Llama-2-13b-chat-hf\"\n", + "device = \"cuda\"\n", + "num_gpus = 2 ## minimal requirement is that you have 2x NVIDIA GPUs\n", + "\n", + "## Remember to supply your own huggingface access token\n", + "hf_token= \"[FILL_IN]\"\n", + "model, tokenizer = load_model(model_name, device, num_gpus,hf_auth_token=hf_token, debug=False)\n", + "\n", + "pipe = pipeline(\n", + " \"text-generation\",\n", + " model=model,\n", + " tokenizer=tokenizer,\n", + " max_new_tokens=256,\n", + " temperature=0.1,\n", + " do_sample=True,\n", + ")\n", + "hf_llm = HuggingFacePipeline(pipeline=pipe)" + ] + }, + { + "cell_type": "markdown", + "id": "f7dd8e39", + "metadata": {}, + "source": [ + "### Step 7 - Supply the hf_llm as well as the retriver we prepared above into langchain's RetrievalQA chain\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0d3515a", + "metadata": {}, + "outputs": [], + "source": [ + "# create the using RetrievalQA \n", + "from langchain.chains import RetrievalQA\n", + "\n", + "qa_chain = RetrievalQA.from_chain_type(llm=hf_llm, # supply meta llama2 model\n", + " chain_type=\"stuff\", \n", + " retriever=retriever, # using our own retriever\n", + " return_source_documents=True) " + ] + }, + { + "cell_type": "markdown", + "id": "7f6f8f45", + "metadata": {}, + "source": [ + "### Step 8 - We are now ready to ask questions " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3407ff3", + "metadata": {}, + "outputs": [], + "source": [ + "query = \"When is the film Titanic being made ?\"\n", + "#query =\"Who is the director for the film?\"\n", + "llm_response = qa_chain(query)\n", + "print(\"llm response after retrieve from KB, the answer is :\\n\")\n", + "print(llm_response['result'])\n", + "print(\"---\"*10)\n", + "print(\"source paragraph >> \")\n", + "llm_response['source_documents'][0].page_content\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fb36ae1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/08_Option(1)_llama_index_with_NVIDIA_AI_endpoint.ipynb b/notebooks/08_Option(1)_llama_index_with_NVIDIA_AI_endpoint.ipynb new file mode 100755 index 000000000..a394f5ee0 --- /dev/null +++ b/notebooks/08_Option(1)_llama_index_with_NVIDIA_AI_endpoint.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0c6f347b", + "metadata": {}, + "source": [ + "## Notebook 8-Option(1): Plugin NVIDIA AI endpoint's [mixtral_8x7b](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/mixtral-8x7b) into LlamaIndex and Langchain\n", + "\n", + "This notebook demonstrates how to plug in a NVIDIA AI Endpoint [mixtral_8x7b](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/mixtral-8x7b) and [embedding nvolveqa_40k](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup), bind these into [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/) with these customizations.\n", + "\n", + "\n", + "
\n", + " \n", + "⚠️ There are continous development and retrieval techniques supported in LlamaIndex and this notebook just shows to quikcly replace components such as llm and embedding to a user-choice, read more [documentation on llama-index](https://docs.llamaindex.ai/en/stable/) for the latest information. \n", + "
\n", + "\n", + "### Prerequisite \n", + "In order to successfully run this notebook, you will need the following -\n", + "\n", + "1. Already successfully gone through the [setup](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup) and generated an API key.\n", + "\n", + "2. Please verify you have successfully pip install all python packages in [requirements.txt](https://github.com/NVIDIA/GenerativeAIExamples/blob/3d29acf677466c5c301370cab5867cb09e04e318/notebooks/requirements.txt)\n", + "\n", + "In this notebook, we will cover the following custom plug-in components -\n", + "\n", + " - LLM using NVIDIA AI Endpoint mixtral_8x7b\n", + " \n", + " - A NVIDIA AI endpoint embedding nvolveqa_40k\n", + " \n", + "Note: As one can see, since we are using NVIDIA AI endpoints as an API, there is no further requirement in the prerequisites about GPUs as compute hardware\n" + ] + }, + { + "cell_type": "markdown", + "id": "eab81075", + "metadata": {}, + "source": [ + "---\n", + "### Step 1 - Load NVIDIA AI Endpoint [mixtral_8x7b](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/mixtral-8x7b)\n", + "\n", + "Note: check the prerequisite if you have not yet obtain a valid API key" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "863eb0a6", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "## API Key can be found by going to NVIDIA NGC -> AI Foundation Models -> (some model) -> Get API Code or similar.\n", + "## 10K free queries to any endpoint (which is a lot actually).\n", + "\n", + "# del os.environ['NVIDIA_API_KEY'] ## delete key and reset\n", + "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n", + " print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n", + "else:\n", + " nvapi_key = getpass.getpass(\"NVAPI Key (starts with nvapi-): \")\n", + " assert nvapi_key.startswith(\"nvapi-\"), f\"{nvapi_key[:5]}... is not a valid key\"\n", + " os.environ[\"NVIDIA_API_KEY\"] = nvapi_key" + ] + }, + { + "cell_type": "markdown", + "id": "18263fc1", + "metadata": {}, + "source": [ + "run a test and see the model generating output response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4465b00f", + "metadata": {}, + "outputs": [], + "source": [ + "# test run and see that you can genreate a respond successfully \n", + "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", + "llm = ChatNVIDIA(model=\"mixtral_8x7b\", nvidia_api_key=nvapi_key)\n", + "result = llm.invoke(\"Write a ballad about LangChain.\")\n", + "print(result.content)" + ] + }, + { + "cell_type": "markdown", + "id": "d54a1400", + "metadata": {}, + "source": [ + "### Step 2 - Load the chosen NVIDIA Endpoint Embedding into llama-index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a31d61c", + "metadata": {}, + "outputs": [], + "source": [ + "# Create and dl embeddings instance wrapping huggingface embedding into langchain embedding\n", + "# Bring in embeddings wrapper\n", + "from llama_index.embeddings import LangchainEmbedding\n", + "\n", + "from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings\n", + "nv_embedding = NVIDIAEmbeddings(model=\"nvolveqa_40k\", model_type=\"query\")\n", + "li_embedding=LangchainEmbedding(nv_embedding)\n", + "# Alternatively, if you want to specify whether it will use the query or passage type\n", + "# embedder = NVIDIAEmbeddings(model=\"nvolveqa_40k\", model_type=\"passage\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "f37ec357", + "metadata": {}, + "source": [ + "Note: if you encounter typing_extension error, simply reinstall via :pip install typing_extensions==4.7.1 --force-reinstall" + ] + }, + { + "cell_type": "markdown", + "id": "7f224941", + "metadata": {}, + "source": [ + "### Step 3 - Wrap the NVIDIA embedding endpoint and the NVIDIA mixtral_8x7b endpoints into llama-index's ServiceContext" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4341187b", + "metadata": {}, + "outputs": [], + "source": [ + "# Bring in stuff to change service context\n", + "from llama_index import set_global_service_context\n", + "from llama_index import ServiceContext\n", + "\n", + "# Create new service context instance\n", + "service_context = ServiceContext.from_defaults(\n", + " chunk_size=1024,\n", + " llm=llm,\n", + " embed_model=li_embedding\n", + ")\n", + "# And set the service context\n", + "set_global_service_context(service_context)\n" + ] + }, + { + "cell_type": "markdown", + "id": "b5faadac", + "metadata": {}, + "source": [ + "### Step 4a - Load the text data using llama-index's SimpleDirectoryReader and we will be using the built-in [VectorStoreIndex](https://docs.llamaindex.ai/en/latest/community/integrations/vector_stores.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c07bfd3", + "metadata": {}, + "outputs": [], + "source": [ + "#create query engine with cross encoder reranker\n", + "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n", + "import torch\n", + "\n", + "documents = SimpleDirectoryReader(\"./toy_data\").load_data()\n", + "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n" + ] + }, + { + "cell_type": "markdown", + "id": "ef737b2b", + "metadata": {}, + "source": [ + "### Step 4b - This will serve as the query engine for us to ask questions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6461099", + "metadata": {}, + "outputs": [], + "source": [ + "# Setup index query engine using LLM \n", + "query_engine = index.as_query_engine()\n", + "# Test out a query in natural\n", + "response = query_engine.query(\"who is the director of the movie Titanic?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c66a54c", + "metadata": {}, + "outputs": [], + "source": [ + "response.metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c281f1f3", + "metadata": {}, + "outputs": [], + "source": [ + "response.response" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/08_Option(2)_llama_index_with_HF_local_LLM.ipynb b/notebooks/08_Option(2)_llama_index_with_HF_local_LLM.ipynb new file mode 100755 index 000000000..113e1bfd4 --- /dev/null +++ b/notebooks/08_Option(2)_llama_index_with_HF_local_LLM.ipynb @@ -0,0 +1,391 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e20e8642", + "metadata": {}, + "source": [ + "## Notebook 8-Option(2): Custom plug in local HF checkpoint into LlamaIndex and Langchain\n", + "\n", + "This notebook demonstrates how to plug in a local llm from [HuggingFace Hub Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) and [all-MiniLM-L6-v2 embedding from Huggingface](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2), bind these to into [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/) with these customizations.\n", + "\n", + "The custom plug-ins shown in this notebook can be replaced, for example, you can swap out the [HuggingFace Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) with [HuggingFace checkpoint from Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1).\n", + "\n", + "\n", + "
\n", + " \n", + "⚠️ The notebook before this one, `08_Option(1)_llama_index_with_NVIDIA_AI_endpoint.ipynb`, contains the same exercise as this notebook but uses NVIDIA AI endpoints' models via API calls instead of loading the models' checkpoints pulled from huggingface model hub, and then load from host to devices (i.e GPUs).\n", + "\n", + "Noted that, since we will load the checkpoints, it will be significantly slower to go through this entire notebook. \n", + "\n", + "If you do decide to go through this notebook, please kindly check the **Prerequisite** section below.\n", + "\n", + "There are continous development and retrieval techniques supported in LlamaIndex and this notebook just shows how to quickly replace components such as llm and embedding per user's choice, read more [documentation on llama-index](https://docs.llamaindex.ai/en/stable/) for the latest nformation. \n", + "\n", + "
\n", + "\n", + "### Prerequisite \n", + "In order to successfully run this notebook, you will need the following -\n", + "\n", + "1. Already being approved of using the checkpoints via applying for [meta-llama](https://huggingface.co/meta-llama)\n", + "2. At least 2 NVIDIA GPUs, each with at least 32G mem, preferably using Ampere architecture\n", + "3. docker and [nvidia-docker](https://github.com/NVIDIA/nvidia-container-toolkit) installed \n", + "4. Registered [NVIDIA NGC](https://www.nvidia.com/en-us/gpu-cloud/) and can pull and run NGC pytorch containers\n", + "5. install necesary python dependencies : \n", + "Note: if you are using the [Dockerfile.gpu_notebook](https://github.com/NVIDIA/GenerativeAIExamples/blob/main/notebooks/Dockerfile.gpu_notebooks), it should already prepare the environment for you. Otherwise please refer to the Dockerfile for environment building.\n", + "\n", + "In this notebook, we will cover the following custom plug-in components -\n", + "\n", + " - LLM locally load from [HuggingFace Hub Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) and warp this into llama-index \n", + " \n", + " - A [HuggingFace embedding all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) \n", + " \n" + ] + }, + { + "cell_type": "markdown", + "id": "e0217b34", + "metadata": {}, + "source": [ + "### Step 1 - Load [HuggingFace Hub Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) \n", + "\n", + "\n", + "Note: Scroll down and make sure you supply the **hf_token in code block below, replace [FILL_IN] with your huggingface token** \n", + ", for how to generate the token from huggingface, please following instruction from [this link](https://huggingface.co/docs/transformers.js/guides/private)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee534b0a", + "metadata": {}, + "outputs": [], + "source": [ + "## uncomment the below if you have not yet install the python dependencies\n", + "#!pip install accelerate transformers==4.33.1 --upgrade" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb4db6b5", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import sys\n", + "\n", + "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", + "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n", + "import os\n", + "from IPython.display import Markdown, display\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer\n", + "import torch\n", + "\n", + "def load_hf_model(model_name_or_path, device, num_gpus,hf_auth_token, debug=False):\n", + " \"\"\"Load an HF locally saved checkpoint.\"\"\"\n", + " if device == \"cpu\":\n", + " kwargs = {}\n", + " elif device == \"cuda\":\n", + " kwargs = {\"torch_dtype\": torch.float16}\n", + " if num_gpus == \"auto\":\n", + " kwargs[\"device_map\"] = \"auto\"\n", + " else:\n", + " num_gpus = int(num_gpus)\n", + " if num_gpus != 1:\n", + " kwargs.update(\n", + " {\n", + " \"device_map\": \"auto\",\n", + " \"max_memory\": {i: \"13GiB\" for i in range(num_gpus)},\n", + " }\n", + " )\n", + " elif device == \"mps\":\n", + " kwargs = {\"torch_dtype\": torch.float16}\n", + " # Avoid bugs in mps backend by not using in-place operations.\n", + " print(\"mps not supported\")\n", + " else:\n", + " raise ValueError(f\"Invalid device: {device}\")\n", + "\n", + " if hf_auth_token is None:\n", + " tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False)\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " model_name_or_path, low_cpu_mem_usage=True, **kwargs\n", + " )\n", + " else:\n", + " tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_auth_token=hf_auth_token, use_fast=False)\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " model_name_or_path, low_cpu_mem_usage=True,use_auth_token=hf_auth_token, **kwargs\n", + " )\n", + "\n", + " if device == \"cuda\" and num_gpus == 1:\n", + " model.to(device)\n", + "\n", + " if debug:\n", + " print(model)\n", + "\n", + " return model, tokenizer\n", + "\n", + "\n", + "\n", + "# Define variable to hold llama2 weights naming \n", + "model_name_or_path = \"meta-llama/Llama-2-13b-chat-hf\"\n", + "# Set auth token variable from hugging face \n", + "# Create tokenizer\n", + "hf_token= \"[FILL_IN]\"\n", + "device = \"cuda\"\n", + "num_gpus = 2\n", + "\n", + "model, tokenizer = load_hf_model(model_name_or_path, device, num_gpus,hf_auth_token=hf_token, debug=False)\n", + "# Setup a prompt \n", + "prompt = \"### User:What is the fastest car in \\\n", + " the world and how much does it cost? \\\n", + " ### Assistant:\"\n", + "# Pass the prompt to the tokenizer\n", + "inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n", + "# Setup the text streamer \n", + "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)" + ] + }, + { + "cell_type": "markdown", + "id": "cbbc4ece", + "metadata": {}, + "source": [ + "run a test and see the model generating output response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "183058b6", + "metadata": {}, + "outputs": [], + "source": [ + "output = model.generate(**inputs, streamer=streamer, use_cache=True, max_new_tokens=100)\n", + "# Covert the output tokens back to text \n", + "output_text = tokenizer.decode(output[0], skip_special_tokens=True)\n", + "output_text" + ] + }, + { + "cell_type": "markdown", + "id": "71f6e263", + "metadata": {}, + "source": [ + "### Step 2 - Construct prompt template" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33126665", + "metadata": {}, + "outputs": [], + "source": [ + "# Import the prompt wrapper...but for llama index\n", + "from llama_index.prompts.prompts import SimpleInputPrompt\n", + "# Create a system prompt \n", + "system_prompt = \"\"\"<>\n", + "You are a helpful, respectful and honest assistant. Always answer as \n", + "helpfully as possible, while being safe. Your answers should not include\n", + "any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.\n", + "Please ensure that your responses are socially unbiased and positive in nature.\n", + "\n", + "If a question does not make any sense, or is not factually coherent, explain \n", + "why instead of answering something not correct. If you don't know the answer \n", + "to a question, please don't share false information.\n", + "\n", + "Your goal is to provide answers relating to the financial performance of \n", + "the company.<>[INST] \n", + "\"\"\"\n", + "# Throw together the query wrapper\n", + "query_wrapper_prompt = SimpleInputPrompt(\"{query_str} [/INST]\")\n", + "## do a test query\n", + "query_str='What can you help me with?'\n", + "query_wrapper_prompt.format(query_str=query_str)\n" + ] + }, + { + "cell_type": "markdown", + "id": "49edb0ed", + "metadata": {}, + "source": [ + "### Step 3 - Load the chosen huggingface Embedding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2262ad3", + "metadata": {}, + "outputs": [], + "source": [ + "# Create and dl embeddings instance wrapping huggingface embedding into langchain embedding\n", + "# Bring in embeddings wrapper\n", + "from llama_index.embeddings import LangchainEmbedding\n", + "# Bring in HF embeddings - need these to represent document chunks\n", + "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", + "embeddings=LangchainEmbedding(\n", + " HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "id": "dde3b0cf", + "metadata": {}, + "source": [ + "### Step 4 - Prepare the locally loaded huggingface llm into into llamaindex" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78d8eaa9", + "metadata": {}, + "outputs": [], + "source": [ + "# Import the llama index HF Wrapper\n", + "from llama_index.llms import HuggingFaceLLM\n", + "# Create a HF LLM using the llama index wrapper \n", + "llm = HuggingFaceLLM(context_window=4096,\n", + " max_new_tokens=256,\n", + " system_prompt=system_prompt,\n", + " query_wrapper_prompt=query_wrapper_prompt,\n", + " model=model,\n", + " tokenizer=tokenizer)\n" + ] + }, + { + "cell_type": "markdown", + "id": "bc30cabc", + "metadata": {}, + "source": [ + "### Step 5 - Wrap the custom embedding and the locally loaded huggingface llm into llama-index's ServiceContext" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71e10c05", + "metadata": {}, + "outputs": [], + "source": [ + "# Bring in stuff to change service context\n", + "from llama_index import set_global_service_context\n", + "from llama_index import ServiceContext" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cbd7b07", + "metadata": {}, + "outputs": [], + "source": [ + "# Create new service context instance\n", + "service_context = ServiceContext.from_defaults(\n", + " chunk_size=1024,\n", + " llm=llm,\n", + " embed_model=embeddings\n", + ")\n", + "# And set the service context\n", + "set_global_service_context(service_context)\n" + ] + }, + { + "cell_type": "markdown", + "id": "5cd94180", + "metadata": {}, + "source": [ + "### Step 6a - Load the text data using llama-index's SimpleDirectoryReader and we will be using the built-in [VectorStoreIndex](https://docs.llamaindex.ai/en/latest/community/integrations/vector_stores.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0106c20", + "metadata": {}, + "outputs": [], + "source": [ + "#create query engine with cross encoder reranker\n", + "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n", + "import torch\n", + "\n", + "documents = SimpleDirectoryReader(\"./toy_data\").load_data()\n", + "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n" + ] + }, + { + "cell_type": "markdown", + "id": "1b0f0c6a", + "metadata": {}, + "source": [ + "### Step 6b - This will serve as the query engine for us to ask questions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37fbc82e", + "metadata": {}, + "outputs": [], + "source": [ + "# Setup index query engine using LLM \n", + "query_engine = index.as_query_engine()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b357fa1", + "metadata": {}, + "outputs": [], + "source": [ + "# Test out a query in natural\n", + "response = query_engine.query(\"what is transformer engine?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43e0f653", + "metadata": {}, + "outputs": [], + "source": [ + "response.metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b13e6753", + "metadata": {}, + "outputs": [], + "source": [ + "response.response" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb b/notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb new file mode 100644 index 000000000..bb834742b --- /dev/null +++ b/notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb @@ -0,0 +1,496 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "00f2afe4", + "metadata": {}, + "source": [ + "## Notebook 09: [Langchain agent](https://python.langchain.com/docs/modules/agents/tools/) with tools plug in multiple models from [NVIDIA AI Endpoint](https://catalog.ngc.nvidia.com/ai-foundation-models)\n", + "\n", + "\n", + "### Prerequisite \n", + "In order to successfully run this notebook, you will need the following -\n", + "\n", + "1. Already successfully gone through the [setup](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup) and generated an API key.\n", + "\n", + "2. install necesary python dependencies in [requirements.txt](https://github.com/NVIDIA/GenerativeAIExamples/blob/main/notebooks/requirements.txt) : then install additional python packages : \n", + "\n", + " pip install gradio matplotlib scikit-image\n", + "\n", + "\n", + "In this notebook, we will cover the following custom plug-in components -\n", + "\n", + " - LLM using NVIDIA AI Endpoint mixtral_8x7b\n", + " \n", + " - A NVIDIA AI endpoint **Deplot** as one of the tool\n", + "\n", + " - A NVIDIA AI endpoint **NeVa** as one of the tool\n", + " \n", + " - Gradio as the simply User Interface where we will upload a few images\n", + "\n", + "At the end of the day, as below illustrated, we would like to have a UI which allow user to upload image of their choice and have the agent choose tools to do visual reasoning. \n", + "\n", + "![interactive UI](./imgs/visual_reasoning.png) \n", + "Note: As one can see, since we are using NVIDIA AI endpoints as an API, there is no further requirement in the prerequisites about GPUs as compute hardware\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "565acc79", + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment the below to install additional python packages.\n", + "#!pip install unstructured\n", + "#!pip install matplotlib scikit-image\n", + "#!pip install gradio" + ] + }, + { + "cell_type": "markdown", + "id": "fa62d8fe", + "metadata": {}, + "source": [ + "### Step 1 - Export the NVIDIA_API_KEY\n", + "You can supply the NVIDIA_API_KEY directly in this notebook when you run the cell below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5578093", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "## API Key can be found by going to NVIDIA NGC -> AI Foundation Models -> (some model) -> Get API Code or similar.\n", + "## 10K free queries to any endpoint (which is a lot actually).\n", + "\n", + "# del os.environ['NVIDIA_API_KEY'] ## delete key and reset\n", + "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n", + " print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n", + "else:\n", + " nvapi_key = getpass.getpass(\"NVAPI Key (starts with nvapi-): \")\n", + " assert nvapi_key.startswith(\"nvapi-\"), f\"{nvapi_key[:5]}... is not a valid key\"\n", + " os.environ[\"NVIDIA_API_KEY\"] = nvapi_key\n", + "global nvapi_key" + ] + }, + { + "cell_type": "markdown", + "id": "6b6a6dba", + "metadata": {}, + "source": [ + "### Step 2 - wrap the NeVa API call into a function and verify by supplying an image to get a respond" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30d055c0", + "metadata": {}, + "outputs": [], + "source": [ + "import openai, httpx, sys\n", + "\n", + "import base64, io\n", + "from PIL import Image\n", + "\n", + "\n", + "def img2base64_string(img_path):\n", + " image = Image.open(img_path)\n", + " if image.width > 800 or image.height > 800:\n", + " image.thumbnail((800, 800))\n", + " buffered = io.BytesIO()\n", + " image.convert(\"RGB\").save(buffered, format=\"JPEG\", quality=85)\n", + " image_base64 = base64.b64encode(buffered.getvalue()).decode()\n", + " return image_base64\n", + "\n", + "def nv_api_response(prompt, img_path):\n", + " base = \"https://api.nvcf.nvidia.com\"\n", + " url = \"/v2/nvcf/pexec/functions/8bf70738-59b9-4e5f-bc87-7ab4203be7a0\"\n", + "\n", + " # Get your key at: https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/neva-22b/api\n", + " # click on the \"Generate Key\" button \n", + "\n", + " def hook(request):\n", + " request.url = httpx.URL(request.url, path=url)\n", + " request.headers['Accept'] = 'text/event-stream'\n", + "\n", + " client = openai.OpenAI(\n", + " base_url=base,\n", + " api_key=nvapi_key,\n", + " http_client=httpx.Client(event_hooks={'request': [hook]})\n", + " )\n", + " base64_str=img2base64_string(img_path)\n", + "\n", + " result = client.chat.completions.create(\n", + " model=\"neva-22b\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": [\n", + " {\"type\": \"text\", \"text\": prompt},\n", + " {\"type\": \"image_url\", \"image_url\": f\"data:image/png;base64,{base64_str}\"} # or image/jpeg\n", + " ]\n", + " },\n", + "\n", + " # {\"role\": \"assistant\", \"labels\": {'creativity': 0}} # Uncomment to get less verbose response\n", + " ],\n", + " max_tokens=512, # Minimum 32, maximum 512. This is a bug. \n", + " temperature=0.2, \n", + " top_p=0.7, \n", + " stream=True # Use streaming mode for responses longer than 32 tokens.\n", + " )\n", + "\n", + " for chunk in result:\n", + " print(chunk.choices[0].delta.content, end=\"\")\n", + " sys.stdout.flush()\n", + " return result" + ] + }, + { + "cell_type": "markdown", + "id": "29f6d798", + "metadata": {}, + "source": [ + "fetch a test image of a pair of white sneakers and verify the function works" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18260b24", + "metadata": {}, + "outputs": [], + "source": [ + "!wget \"https://docs.google.com/uc?export=download&id=12ZpBBFkYu-jzz1iz356U5kMikn4uN9ww\" -O ./toy_data/jordan.png" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f74e7960", + "metadata": {}, + "outputs": [], + "source": [ + "img_path=\"./toy_data/jordan.png\"\n", + "prompt=\"describe the image\"\n", + "out=nv_api_response(prompt,img_path)" + ] + }, + { + "cell_type": "markdown", + "id": "36891baa", + "metadata": {}, + "source": [ + "### Step 3 - we are gonna use mixtral_8x7b model as our main LLM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44c47713", + "metadata": {}, + "outputs": [], + "source": [ + "# test run and see that you can genreate a respond successfully \n", + "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", + "llm = ChatNVIDIA(model=\"mixtral_8x7b\", nvidia_api_key=nvapi_key)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58c93087", + "metadata": {}, + "outputs": [], + "source": [ + "#Set up Prerequisites for Image Captioning App User Interface\n", + "import os\n", + "import io\n", + "import IPython.display\n", + "from PIL import Image\n", + "import base64\n", + "import requests\n", + "import gradio as gr\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "b3cbd029", + "metadata": {}, + "source": [ + "### Step 4- wrap Deplot and Neva as tools for later usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f084601", + "metadata": {}, + "outputs": [], + "source": [ + "#Set up Prerequisites for Image Captioning App User Interface\n", + "import os\n", + "import io\n", + "import IPython.display\n", + "from PIL import Image\n", + "import base64\n", + "import requests\n", + "import gradio as gr\n", + "\n", + "from langchain.tools import BaseTool\n", + "from transformers import BlipProcessor, BlipForConditionalGeneration, DetrImageProcessor, DetrForObjectDetection\n", + "from PIL import Image\n", + "import torch\n", + "#\n", + "import os\n", + "from tempfile import NamedTemporaryFile\n", + "from langchain.agents import initialize_agent\n", + "from langchain.chains.conversation.memory import ConversationBufferWindowMemory\n", + "\n", + "class ImageCaptionTool(BaseTool):\n", + " name = \"Image captioner from NeVa\"\n", + " description = \"Use this tool when given the path to an image that you would like to be described. \" \\\n", + " \"It will return a simple caption describing the image.\"\n", + " \n", + " # generate api key via https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/neva-22b/api\n", + " def img2base64_string(self,img_path):\n", + " print(img_path)\n", + " image = Image.open(img_path)\n", + " if image.width > 800 or image.height > 800:\n", + " image.thumbnail((800, 800))\n", + " buffered = io.BytesIO()\n", + " image.convert(\"RGB\").save(buffered, format=\"JPEG\", quality=85)\n", + " image_base64 = base64.b64encode(buffered.getvalue()).decode()\n", + " return image_base64\n", + "\n", + " def _run(self, img_path):\n", + " invoke_url = \"https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/8bf70738-59b9-4e5f-bc87-7ab4203be7a0\"\n", + " fetch_url_format = \"https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/\"\n", + "\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {nvapi_key}\",\n", + " \"Accept\": \"application/json\",\n", + " }\n", + " base64_str = self.img2base64_string(img_path)\n", + " prompt = \"\"\"\\\n", + " can you summarize what is in the image\\\n", + " and return the answer \\\n", + " \"\"\"\n", + " payload = {\n", + " \"messages\":[\n", + " {\"role\": \"user\", \"content\": [\n", + " {\"type\": \"text\", \"text\": prompt},\n", + " {\"type\": \"image_url\", \"image_url\": f\"data:image/png;base64,{base64_str}\"} # or image/jpeg\n", + " ]\n", + " },\n", + " {\n", + " \"labels\": {\n", + " \"creativity\": 6,\n", + " \"helpfulness\": 6,\n", + " \"humor\": 0,\n", + " \"quality\": 6\n", + " },\n", + " \"role\": \"assistant\"\n", + " } ],\n", + " \"temperature\": 0.2,\n", + " \"top_p\": 0.7,\n", + " \"max_tokens\": 512,\n", + " \"stream\": False\n", + " }\n", + "\n", + " # re-use connections\n", + " session = requests.Session()\n", + "\n", + " response = session.post(invoke_url, headers=headers, json=payload)\n", + " print(response)\n", + " while response.status_code == 202:\n", + " request_id = response.headers.get(\"NVCF-REQID\")\n", + " fetch_url = fetch_url_format + request_id\n", + " response = session.get(fetch_url, headers=headers)\n", + "\n", + " response.raise_for_status()\n", + " response_body = response.json()\n", + " print(response_body)\n", + " return response_body['choices'][0]['message']['content']\n", + "\n", + "\n", + " def _arun(self, query: str):\n", + " raise NotImplementedError(\"This tool does not support async\")\n", + "\n", + "\n", + "class TabularPlotTool(BaseTool):\n", + " name = \"Tabular Plot reasoning tool\"\n", + " description = \"Use this tool when given the path to an image that contain bar, pie chart objects. \" \\\n", + " \"It will extract and return the tabular data \" \n", + " \n", + " def img2base64_string(self, img_path):\n", + " print(img_path)\n", + " image = Image.open(img_path)\n", + " if image.width > 800 or image.height > 800:\n", + " image.thumbnail((800, 800))\n", + " buffered = io.BytesIO()\n", + " image.convert(\"RGB\").save(buffered, format=\"JPEG\", quality=85)\n", + " image_base64 = base64.b64encode(buffered.getvalue()).decode()\n", + " return image_base64\n", + " \n", + " def _run(self, img_path): \n", + " # using DePlot from NVIDIA AI Endpoint playground, generate your key via :https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/deplot/api\n", + " invoke_url = \"https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/3bc390c7-eeec-40f7-a64d-0c6a719985f7\"\n", + " fetch_url_format = \"https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/\"\n", + "\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {nvapi_key}\",\n", + " \"Accept\": \"application/json\",\n", + " }\n", + "\n", + " base64_str = self.img2base64_string(img_path)\n", + " prompt = \"\"\"\\\n", + " can you summarize what is in the image\\\n", + " and return the answer \\\n", + " \"\"\"\n", + " payload = {\n", + " \"messages\":[\n", + " {\"role\": \"user\", \"content\": [\n", + " {\"type\": \"text\", \"text\": prompt},\n", + " {\"type\": \"image_url\", \"image_url\": f\"data:image/png;base64,{base64_str}\"} # or image/jpeg\n", + " ]\n", + " },\n", + " ],\n", + " \"temperature\": 0.2,\n", + " \"top_p\": 0.7,\n", + " \"max_tokens\": 512,\n", + " \"stream\": False\n", + " }\n", + "\n", + " # re-use connections\n", + " session = requests.Session()\n", + "\n", + " response = session.post(invoke_url, headers=headers, json=payload)\n", + "\n", + " while response.status_code == 202:\n", + " request_id = response.headers.get(\"NVCF-REQID\")\n", + " fetch_url = fetch_url_format + request_id\n", + " response = session.get(fetch_url, headers=headers)\n", + "\n", + " response.raise_for_status()\n", + " response_body = response.json()\n", + " print(response_body)\n", + " return response_body['choices'][0]['message']['content']\n", + "\n", + " def _arun(self, query: str):\n", + " raise NotImplementedError(\"This tool does not support async\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "fb59c38a", + "metadata": {}, + "source": [ + "### Step 5 - initaite the agent with tools we previously defined " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92047c5e", + "metadata": {}, + "outputs": [], + "source": [ + "#initialize the gent\n", + "tools = [ImageCaptionTool(),TabularPlotTool()]\n", + "\n", + "conversational_memory = ConversationBufferWindowMemory(\n", + " memory_key='chat_history',\n", + " k=5,\n", + " return_messages=True\n", + ")\n", + "\n", + "\n", + "agent = initialize_agent(\n", + " agent=\"chat-conversational-react-description\",\n", + " tools=tools,\n", + " llm=llm,\n", + " max_iterations=5,\n", + " verbose=True,\n", + " memory=conversational_memory,\n", + " handle_parsing_errors=True,\n", + " early_stopping_method='generate'\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "id": "07766583", + "metadata": {}, + "source": [ + "### Step 6 - verify the agent can indeed use the tools with the supplied image and query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05adfb58", + "metadata": {}, + "outputs": [], + "source": [ + "user_question = \"What is in this image?\" \n", + "img_path=\"./toy_data/jordan.png\"\n", + "response = agent.run(f'{user_question}, this is the image path: {img_path}')\n", + "print(response)\n" + ] + }, + { + "cell_type": "markdown", + "id": "d5568dcb", + "metadata": {}, + "source": [ + "### Step 7 - wrap the agent into a simple gradio UI so we can interactively upload arbitrary image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f80f2fb", + "metadata": {}, + "outputs": [], + "source": [ + "import gradio as gr\n", + "ImageCaptionApp = gr.Interface(fn=agent,\n", + " inputs=[gr.Image(label=\"Upload image\", type=\"filepath\")],\n", + " outputs=[gr.Textbox(label=\"Caption\")],\n", + " title=\"Image Captioning with langchain agent\",\n", + " description=\"combine langchain agent using tools for image reasoning\",\n", + " allow_flagging=\"never\")\n", + " \n", + "ImageCaptionApp.launch(share=True)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Dockerfile.gpu_notebook b/notebooks/Dockerfile.gpu_notebook new file mode 100644 index 000000000..6eb23b2f2 --- /dev/null +++ b/notebooks/Dockerfile.gpu_notebook @@ -0,0 +1,34 @@ +# Use a base image with Python +FROM nvcr.io/nvidia/pytorch:23.05-py3 + +# Set working directory +WORKDIR /app + +#COPY notebooks +COPY ./notebooks/*.ipynb notebooks/ + +RUN mkdir -p /app/imgs + +COPY ./notebooks/dataset.zip . +COPY ./notebooks/toy_data/* notebooks/toy_data/ + +COPY ./notebooks/imgs/* notebooks/imgs/ + +COPY ./integrations/langchain/llms/triton_trt_llm.py . + +COPY ./integrations/langchain/llms/nv_aiplay.py . + +COPY ./notebooks/requirements.txt . + +# Run pip dependencies +RUN pip3 install -r requirements.txt + +RUN apt-get update && apt-get install -y unzip wget git libgl1-mesa-glx libglib2.0-0 + +RUN pip3 install accelerate transformers==4.33.1 --upgrade + +# Expose port 8888 for JupyterLab +EXPOSE 8888 + +# Start JupyterLab when the container runs +CMD ["jupyter", "lab", "--allow-root", "--ip=0.0.0.0","--NotebookApp.token=''", "--port=8888"] diff --git a/notebooks/imgs/inference_runtime.png b/notebooks/imgs/inference_runtime.png new file mode 100644 index 000000000..e6462a73a Binary files /dev/null and b/notebooks/imgs/inference_runtime.png differ diff --git a/notebooks/imgs/preprocessing.png b/notebooks/imgs/preprocessing.png new file mode 100644 index 000000000..881c340f0 Binary files /dev/null and b/notebooks/imgs/preprocessing.png differ diff --git a/notebooks/imgs/visual_reasoning.png b/notebooks/imgs/visual_reasoning.png new file mode 100644 index 000000000..bcc99060f Binary files /dev/null and b/notebooks/imgs/visual_reasoning.png differ diff --git a/notebooks/toy_data/Sweden.txt b/notebooks/toy_data/Sweden.txt new file mode 100644 index 000000000..2e480f658 --- /dev/null +++ b/notebooks/toy_data/Sweden.txt @@ -0,0 +1,400 @@ +Sweden, formally the Kingdom of Sweden, is a Nordic country located on the Scandinavian Peninsula in Northern Europe. It borders Norway to the west and north, Finland to the east, and is connected to Denmark in the southwest by a bridge–tunnel across the Öresund. At 447,425 square kilometres (172,752 sq mi), Sweden is the largest Nordic country, the third-largest country in the European Union, and the fifth-largest country in Europe. The capital and largest city is Stockholm. Sweden has a total population of 10.5 million, and a low population density of 25.5 inhabitants per square kilometre (66/sq mi), with around 87% of Swedes residing in urban areas, which cover 1.5% of the entire land area, in the central and southern half of the country. +Nature in Sweden is dominated by forests and many lakes, including some of the largest in Europe. Many long rivers run from the Scandes range through the landscape, primarily emptying into the northern tributaries of the Baltic Sea. It has an extensive coastline and most of the population lives near a major body of water. With the country ranging from 55°N to 69°N, the climate of Sweden is diverse due to the length of the country. The usual conditions are mild for the latitudes with a maritime south, continental centre and subarctic north. Snow cover is variable in the densely populated south, but reliable in higher latitudes. Furthermore, the rain shadow of the Scandes results in quite dry winters and sunny summers in much of the country. +Germanic peoples have inhabited Sweden since prehistoric times, emerging into history as the Geats (Swedish: Götar) and Swedes (Svear) and constituting the sea peoples known as the Norsemen. An independent Swedish state emerged during the early 12th century. After the Black Death in the middle of the 14th century killed about a third of the Scandinavian population, the dominance of the Hanseatic League in Northern Europe threatened Scandinavia economically and politically. This led to the formation of the Scandinavian Kalmar Union in 1397, which Sweden left in 1523. When Sweden became involved in the Thirty Years' War on the Protestant side, an expansion of its territories began, forming the Swedish Empire, which remained one of the great powers of Europe until the early 18th century. +Swedish territories outside the Scandinavian Peninsula were gradually lost during the 18th and 19th centuries, ending with the annexation of present-day Finland by Russia in 1809. The last war in which Sweden was directly involved was in 1814 when Norway was militarily forced into a personal union, which peacefully dissolved in 1905. In 2014, Sweden celebrated 200 years of peace, a longer span of peacetime than even Switzerland. Sweden maintained an official policy of neutrality during wartime and non-participation in military alliances during peacetime, although Sweden secretly relied on U.S. nuclear submarines during the Cold War. Sweden has since 2008 joined EU battlegroups, provided intelligence to NATO and since 2009 openly moved towards cooperation with NATO. In 2022, following the Russian invasion of Ukraine, Sweden announced its intent to join NATO. +Sweden is a highly developed country ranked seventh in the Human Development Index, it is a constitutional monarchy and a parliamentary democracy, with legislative power vested in the 349-member unicameral Riksdag. It is a unitary state, currently divided into 21 counties and 290 municipalities. Sweden maintains a Nordic social welfare system that provides universal health care and tertiary education for its citizens. It has the world's 14th highest GDP per capita and ranks very highly in quality of life, health, education, protection of civil liberties, economic competitiveness, income equality, gender equality and prosperity. Sweden joined the European Union on 1 January 1995 but rejected Eurozone membership following a referendum. It is also a member of the United Nations, the Nordic Council, the Council of Europe, the World Trade Organization and the Organisation for Economic Co-operation and Development (OECD). + + +== Etymology == + +The name for Sweden is generally agreed to derive from the Proto-Indo-European root *s(w)e, meaning "one's own", referring to one's own tribe from the tribal period. The native Swedish name, Sverige (a compound of the words Svea and rike, with lenition of the consonant [k], first recorded in the cognate Swēorice in Beowulf), translates as "realm of the Swedes", which excluded the Geats in Götaland. +The contemporary English variation was derived in the 17th-century from Middle Dutch and Middle Low German. As early as 1287, references are found in Middle Dutch referring to a lande van sweden ("land of [the] Swedes"), with swede as the singular form. In Old English the country was known as Swéoland or Swíoríce, and in Early Modern English as Swedeland. Some Finnic languages, such as Finnish and Estonian, use the terms Ruotsi and Rootsi; these variations refer to the Rus' people who inhabited the coastal areas of Roslagen in Uppland and who gave their name to Russia. + + +== History == + + +=== Prehistory === + +Sweden's prehistory begins in the Allerød oscillation, a warm period around 12,000 BC, with Late Palaeolithic reindeer-hunting camps of the Bromme culture at the edge of the ice in what is now the country's southernmost province, Scania. This period was characterised by small clans of hunter-gatherers who relied on flint technology.Sweden and its people were first described by Publius Cornelius Tacitus in his written work Germania (98 AD). In Germania 44 and 45 he mentions the Swedes (Suiones) as a powerful tribe (distinguished not merely for their arms and men, but for their powerful fleets) with ships that had a prow at each end (longships). Which kings (*kuningaz) ruled these Suiones is unknown, but Norse mythology presents a long line of legendary and semi-legendary kings going back to the last centuries BC. As for literacy in Sweden itself, the runic script was in use among the south Scandinavian elite by at least the second century AD, but all that has come down to the present from the Roman Period is curt inscriptions on artefacts, mainly of male names, demonstrating that the people of south Scandinavia spoke Proto-Norse at the time, a language ancestral to Swedish and other North Germanic languages.In the sixth century, Jordanes names two tribes living in Scandza, both of which are now considered to be synonymous with the Swedes: the Suetidi and Suehans. Suetidi is considered to be the Latin form of Svíþjóð, the Old Norse name for the Swedes. Jordanes describes the Suetidi and Dani as being of the same stock and the tallest of people. He later mentions other Scandinavian tribes as being of a same stature. The Suehans were known to the Roman world as suppliers of black fox skins and, according to Jordanes, had very fine horses, similar to those of the Thyringi of Germania (alia vero gens ibi moratur Suehans, quae velud Thyringi equis utuntur eximiis). + + +=== Vikings === + +The Swedish Viking Age lasted roughly from the eighth century to the 11th century. It is believed that Swedish Vikings and Gutar mainly travelled east and south, going to Finland, Estonia, the Baltic countries, Russia, Belarus, Ukraine, the Black Sea and even as far as Baghdad. Their routes passed through the Dnieper south to Constantinople, on which they carried out numerous raids. The Byzantine Emperor Theophilos noticed their great skills in war, and invited them to serve as his personal bodyguard, known as the Varangian Guard. The Swedish Vikings, called Rus are believed to be the founding fathers of Kievan Rus'. The Arab traveller Ibn Fadlan described these Vikings saying: + +I have seen the Rus as they came on their merchant journeys and encamped by the Itil. I have never seen more perfect physical specimens, tall as date palms, blond and ruddy; they wear neither tunics nor caftans, but the men wear a garment which covers one side of the body and leaves a hand free. Each man has an axe, a sword, and a knife, and keeps each by him at all times. The swords are broad and grooved, of Frankish sort. + +The actions of these Swedish Vikings are commemorated on many runestones in Sweden, such as the Greece runestones and the Varangian runestones. There was also considerable participation in expeditions westwards, which are commemorated on stones such as the England runestones. The last major Swedish Viking expedition appears to have been the ill-fated expedition of Ingvar the Far-Travelled to Serkland, the region south-east of the Caspian Sea. Its members are commemorated on the Ingvar runestones, none of which mentions any survivor. What happened to the crew is unknown, but it is believed that they died of sickness. + + +=== Kingdom of Sweden === +It is not known when and how the kingdom of Sweden was born, but the list of Swedish monarchs is drawn from the first kings known to have ruled both Svealand (Sweden) and Götaland (Gothia) as one province, beginning with Eric the Victorious. Sweden and Gothia were two separate nations long before that and since antiquity. It is not known how long they existed: the epic poem Beowulf describes semi-legendary Swedish-Geatish wars in the sixth century. Götaland in this sense mainly includes the provinces of Östergötland (East Gothia) and Västergötland (West Gothia). The island of Gotland was disputed by other than Swedes, at this time (Danish, Hanseatic, and Gotland-domestic). Småland was at that time of little interest to anyone due to the deep pine forests, and only the city of Kalmar with its castle was of importance. The south-west parts of the Scandinavian peninsula consisted of three Danish provinces (Scania, Blekinge and Halland). North of Halland, Denmark had a direct border to Norway and its province Bohuslän. But there were Swedish settlements along the southern coastline of Norrland. + +During the early stages of the Scandinavian Viking Age, Ystad in the Danish province Scania and Paviken on Gotland were flourishing centres of trade, but they were not parts of the early Swedish Kingdom. Remains of what is believed to have been a large market dating from 600 to 700 CE have been found in Ystad. In Paviken, an important centre of trade in the Baltic region during the ninth and tenth century, remains have been found of a large Viking Age harbour with shipbuilding yards and handicraft industries. Between 800 and 1000, trade brought an abundance of silver to Gotland, and according to some scholars, the Gotlanders of this era hoarded more silver than the rest of the population of Scandinavia combined. + +Saint Ansgar is usually credited with introducing Christianity to Sweden in 829, but the new religion did not begin to fully replace paganism until the 12th century. During the 11th century, Christianity became the prevalent religion, and from 1050 Sweden is counted as a Christian nation. The period between 1100 and 1400 was characterised by internal power struggles and competition among the Nordic kingdoms. In the years 1150–1293 according to the legend of Eric IX and the Eric Chronicles Swedish kings made a first, second and third crusade to pagan Finland against Finns, Tavastians, and Karelians and started conflicts with the Rus' who no longer had any connection with Sweden. The Swedish colonisation of the coastal areas of Finland also started during the 12th and 13th century. In the 14th century, the colonisation began to be more organised, and by the end of the century, several of the coastal areas of Finland were inhabited mostly by Swedes. + +Except for the provinces of Scania, Blekinge and Halland in the south-west of the Scandinavian peninsula, which were parts of the Kingdom of Denmark during this time, feudalism never developed in Sweden as it did in the rest of Europe. As a result, the peasantry remained largely a class of free farmers throughout most of Swedish history. Slavery (also called thralldom) was not common in Sweden, and what slavery there was tended to be driven out of existence by the spread of Christianity, by the difficulty of obtaining slaves from lands east of the Baltic Sea, and by the development of cities before the 16th century. Indeed, both slavery and serfdom were abolished altogether by a decree of King Magnus IV in 1335. Former slaves tended to be absorbed into the peasantry, and some became labourers in the towns. Still, Sweden remained a poor and economically backward country in which barter was the primary means of exchange. For instance, the farmers of the province of Dalsland would transport their butter to the mining districts of Sweden and exchange it there for iron, which they would then take to the coast and trade for fish, which they consumed, while the iron would be shipped abroad.In the middle of the 14th century, Sweden was struck by the Black Death. The population of Sweden and most of Europe was decimated. The population (at same territory) did not reach the numbers of the year 1348 again until the beginning of the 19th century. One third of the population died during the period of 1349–1351. During this period, the Swedish cities began to acquire greater rights and were strongly influenced by German merchants of the Hanseatic League, active especially at Visby. In 1319, Sweden and Norway were united under King Magnus Eriksson, and in 1397 Queen Margaret I of Denmark affected the personal union of Sweden, Norway, and Denmark through the Kalmar Union. However, Margaret's successors, whose rule was also centred in Denmark, were unable to control the Swedish nobility. + +Many times the Swedish crown was inherited by child kings over the course of the kingdom's existence; consequently, real power was held for long periods by regents (notably those of the Sture family) chosen by the Swedish parliament. King Christian II of Denmark, who asserted his claim to Sweden by force of arms, ordered a massacre of Swedish nobles in Stockholm in 1520. This came to be known as the "Stockholm blood bath" and stirred the Swedish nobility to new resistance and, on 6 June (now Sweden's national holiday) in 1523, they made Gustav Vasa their king. This is sometimes considered as the foundation of modern Sweden. Shortly afterwards the new king rejected Catholicism and led Sweden into the Protestant Reformation. +The Hanseatic League had been officially formed at Lübeck on the Baltic coast of Northern Germany in 1356. The League sought civil and commercial privileges from the princes and royalty of the countries and cities along the coasts of the Baltic Sea. In exchange, they offered a certain amount of protection to the joining cities. Having their own navy, the Hansa were able to sweep the Baltic Sea free of pirates. The privileges obtained by the Hansa included assurances that only Hansa citizens would be allowed to trade from the ports where they were located. They sought agreement to be free of all customs and taxes. With these concessions, Lübeck merchants flocked to Stockholm, where they soon came to dominate the city's economic life and made the port city of Stockholm into the leading commercial and industrial city of Sweden. Under the Hanseatic trade, two-thirds of Stockholm's imports consisted of textiles, while the remaining third was salt. The main exports from Sweden were iron and copper.However, the Swedes began to resent the monopoly trading position of the Hansa (mostly consisting of German citizens), and to resent the income they felt they lost to the Hansa. Consequently, when Gustav Vasa or Gustav I broke the monopoly power of the Hanseatic League he was regarded as a hero by the Swedish people. History now views Gustav I as the father of the modern Swedish nation. The foundations laid by Gustav would take time to develop. Furthermore, when Sweden did develop, freed itself from the Hanseatic League, and entered its golden era, the fact that the peasantry had traditionally been free meant that more of the economic benefits flowed back to them rather than going to a feudal landowning class.The end of the 16th century was marked by a final phase of rivalry between the remaining Catholics and the new Protestant communities. In 1592, Gustav Vasa's Catholic grandson and king of Poland, Sigismund, ascended the Swedish throne. He pursued to strengthen Rome's influence by initiating Counter-Reformation and created a dual monarchy, which temporarily became known as the Polish-Swedish Union. His despotic rule, strongly characterised by intolerance towards the Protestants, sparked a civil war that plunged Sweden into poverty. In opposition, Sigismund's uncle and successor, Charles Vasa, summoned the Uppsala Synod in 1593 which officially confirmed the modern Church of Sweden as Lutheran. Following his deposition in 1599, Sigismund attempted to reclaim the throne at every expense and hostilities between Poland and Sweden continued for the next one hundred years. + + +=== Swedish Empire === + +During the 17th century, Sweden emerged as a European great power. Before the emergence of the Swedish Empire, Sweden was a poor and sparsely populated country on the fringe of European civilisation, with no significant power or reputation. Sweden rose to prominence on a continental scale during the reign of king Gustavus Adolphus, seizing territories from Russia and the Polish–Lithuanian Commonwealth in multiple conflicts, including the Thirty Years' War.During the Thirty Years' War, Sweden conquered approximately half of the Holy Roman states and defeated the Imperial army at the Battle of Breitenfeld in 1631. Gustavus Adolphus planned to become the new Holy Roman Emperor, ruling over a united Scandinavia and the Holy Roman states, but he was killed at the Battle of Lützen in 1632. After the Battle of Nördlingen in 1634, Sweden's only significant military defeat of the war, pro-Swedish sentiment among the German states faded. These German provinces broke away from Swedish power one by one, leaving Sweden with only a few northern German territories: Swedish Pomerania, Bremen-Verden and Wismar. From 1643 to 1645, during the last years of the war, Sweden and Denmark-Norway fought the Torstenson War. The result of that conflict and the conclusion of the Thirty Years' War helped establish postwar Sweden as a major force in Europe. + +In the middle of the 17th century, Sweden was the third-largest country in Europe by land area, surpassed by only Russia and Spain. Sweden reached its largest territorial extent under the rule of Charles X after the treaty of Roskilde in 1658, following Charles X's risky but successful crossing of the Danish Belts. The foundation of Sweden's success during this period is credited to Gustav I's major changes to the Swedish economy in the 16th century, and his introduction of Protestantism. In the 17th century, Sweden was engaged in many wars, for example with Poland–Lithuania, with both sides competing for territories of today's Baltic states, with Sweden suffering a notable defeat at the Battle of Kircholm. One-third of the Finnish population died in the devastating Great Famine of 1695–1697 that struck the country. Famine also hit Sweden, killing roughly 10% of Sweden's population.The Swedes conducted a series of invasions into the Polish–Lithuanian Commonwealth, known as the Deluge. After more than half a century of almost constant warfare, the Swedish economy had deteriorated. It became the lifetime task of Charles X's son, Charles XI, to rebuild the economy and refit the army. His legacy to his son, the coming ruler of Sweden, Charles XII, was one of the finest arsenals in the world, a large standing army and a great fleet. Russia, the most serious threat to Sweden at this time, had a larger army but lagged far behind in both equipment and training.After the Battle of Narva in 1700, one of the first battles of the Great Northern War, the Russian army was so severely devastated that Sweden had an open chance to invade Russia. However, Charles XII did not pursue the Russian army, instead turning against Poland and defeating the Polish king, Augustus II the Strong, and his Saxon allies at the Battle of Kliszów in 1702. This gave Russia time to rebuild and modernise its army. + +After the success of invading Poland, Charles XII decided to make an attempt at invading Russia, but this ended in a decisive Russian victory at the Battle of Poltava in 1709. After a long march exposed to Cossack raids, the Russian Tsar Peter the Great's scorched-earth techniques and the extremely cold winter of 1709, the Swedes stood weakened with a shattered morale and were enormously outnumbered against the Russian army at Poltava. The defeat meant the beginning of the end for the Swedish Empire. In addition, the plague raging in East Central Europe devastated the Swedish dominions and reached Central Sweden in 1710. Returning to Sweden in 1715, Charles XII launched two campaigns against Norway on 1716 and 1718, respectively. During the second attempt, he was shot to death during the siege of Fredriksten fortress. The Swedes were not militarily defeated at Fredriksten, but the whole structure and organisation of the campaign fell apart with the king's death, and the army withdrew. +Forced to cede large areas of land in the Treaty of Nystad in 1721, Sweden also lost its place as an empire and as the dominant state on the Baltic Sea. With Sweden's lost influence, Russia emerged as an empire and became one of Europe's dominant nations. As the war finally ended in 1721, Sweden had lost an estimated 200,000 men, 150,000 of those from the area of present-day Sweden and 50,000 from the Finnish part of Sweden.In the 18th century, Sweden did not have enough resources to maintain its territories outside Scandinavia, and most of them were lost, culminating with the loss in 1809 of eastern Sweden to Russia, which became the highly autonomous Grand Principality of Finland in Imperial Russia.In interest of re-establishing Swedish dominance in the Baltic Sea, Sweden allied itself against its traditional ally and benefactor, France, in the Napoleonic Wars. However, in 1810, a French Marshal, Jean-Baptiste Bernadotte, was chosen as heir presumptive to the decrepit Charles XIII; in 1818, he established the House of Bernadotte, taking the regnal name of Charles XIV. Sweden's role in the Battle of Leipzig gave it the authority to force Denmark–Norway, an ally of France, to cede Norway to the King of Sweden on 14 January 1814 in exchange for the northern German provinces, at the Treaty of Kiel. The Norwegian attempts to keep their status as a sovereign state were rejected by the Swedish king, Charles XIII. He launched a military campaign against Norway on 27 July 1814, ending in the Convention of Moss, which forced Norway into a personal union with Sweden under the Swedish crown, which lasted until 1905. The 1814 campaign was the last time Sweden was at war. + + +=== Modern history === + +The Swedish East India Company, Ostindiska Kompaniet, began in 1731. The obvious choice of home port was Gothenburg at Sweden's west coast, the mouth of Göta älv river is very wide and has the county's largest and best harbour for high-seas journeys. The trade continued into the 19th century, and caused the little town to become Sweden's second city. +There was a significant population increase during the 18th and 19th centuries, which the writer Esaias Tegnér in 1833 attributed to "the peace, the smallpox vaccine, and the potatoes". Between 1750 and 1850, the population in Sweden doubled. According to some scholars, mass emigration to America became the only way to prevent famine and rebellion; over 1% of the population emigrated annually during the 1880s. Nevertheless, Sweden remained poor, retaining a nearly entirely agricultural economy even as Denmark and Western European countries began to industrialise. + +Many looked towards America for a better life during this time. It is thought that between 1850 and 1910 more than one million Swedes moved to the United States. In the early 20th century, more Swedes lived in Chicago than in Gothenburg (Sweden's second largest city). Most Swedish immigrants moved to the midwestern United States, with a large population in Minnesota, with a few others moving to other parts of the United States and Canada. +Despite the slow rate of industrialisation into the 19th century, many important changes were taking place in the agrarian economy due to constant innovations and a rapid population growth. These innovations included government-sponsored programmes of enclosure, aggressive exploitation of agricultural lands, and the introduction of new crops such as the potato. Because the Swedish peasantry had never been enserfed as elsewhere in Europe, the Swedish farming culture began to take on a critical role in Swedish politics, which has continued through modern times with modern Agrarian party (now called the Centre Party). Between 1870 and 1914, Sweden began developing the industrialised economy that exists today.Strong grassroots movements sprang up in Sweden during the latter half of the 19th century (trade unions, temperance groups, and independent religious groups), creating a strong foundation of democratic principles. In 1889 The Swedish Social Democratic Party was founded. These movements precipitated Sweden's migration into a modern parliamentary democracy, achieved by the time of World War I. As the Industrial Revolution progressed during the 20th century, people gradually moved into cities to work in factories and became involved in socialist unions. A communist revolution was avoided in 1917, following the re-introduction of parliamentarism, and the country was democratised. + + +=== World War I and World War II === + +Sweden was officially neutral during World War I. However, under pressure from the German Empire, they did take steps which were detrimental to the Allied powers. Most notably, mining the Øresund channel, thus closing it to Allied shipping, and allowing the Germans to use Swedish facilities and the Swedish cipher to transmit secret messages to their overseas embassies. Sweden also allowed volunteers to fight for the White Guards alongside the Germans against the Red Guards and Russians in the Finnish Civil War, and briefly occupied Åland in cooperation with the German Empire. + +As in the First World War, Sweden remained officially neutral during World War II, although its neutrality during World War II has been disputed. Sweden was under German influence for much of the war, as ties to the rest of the world were cut off through blockades. The Swedish government felt that it was in no position to openly contest Germany, and therefore made some concessions. Sweden also supplied steel and machined parts to Germany throughout the war. The Swedish government unofficially supported Finland in the Winter War and the Continuation War by allowing volunteers and materiel to be shipped to Finland. However, Sweden supported Norwegian resistance against Germany, and in 1943 helped rescue Danish Jews from deportation to Nazi concentration camps. +During the last year of the war, Sweden began to play a role in humanitarian efforts, and many refugees, among them several thousand Jews from Nazi-occupied Europe, were rescued thanks to the Swedish rescue missions to internment camps and partly because Sweden served as a haven for refugees, primarily from the Nordic countries and the Baltic states. The Swedish diplomat Raoul Wallenberg and his colleagues ensured the safety of tens of thousands of Hungarian Jews. Nevertheless, both Swedes and others have argued that Sweden could have done more to oppose the Nazis' war efforts, even if it meant increasing the risk of occupation. + + +=== Post-war era === + +Sweden was officially a neutral country and remained outside NATO and Warsaw Pact membership during the Cold War, but privately Sweden's leadership had strong ties with the United States and other western governments. Following the war, Sweden took advantage of an intact industrial base, social stability and its natural resources to expand its industry to supply the rebuilding of Europe. Sweden received aid under the Marshall Plan and participated in the OECD. During most of the post-war era, the country was governed by the Swedish Social Democratic Party largely in co-operation with trade unions and industry. The government actively pursued an internationally competitive manufacturing sector of primarily large corporations.Sweden was one of the founding states of the European Free Trade Area (EFTA). During the 1960s the EFTA countries were often referred to as the Outer Seven, as opposed to the Inner Six of the then-European Economic Community (EEC).Sweden, like many industrialised countries, entered a period of economic decline and upheaval following the oil embargoes of 1973–74 and 1978–79. In the 1980s several key Swedish industries were significantly restructured. Shipbuilding was discontinued, wood pulp was integrated into modernised paper production, the steel industry was concentrated and specialised, and mechanical engineering was robotised.Between 1970 and 1990, the overall tax burden rose by over 10%, and the growth was low compared with other countries in Western Europe. Eventually, the government began to spend over half of the country's gross domestic product. Swedish GDP per capita ranking declined during this time. + + +=== Recent history === + +A bursting real estate bubble caused by inadequate controls on lending combined with an international recession and a policy switch from anti-unemployment policies to anti-inflationary policies resulted in a fiscal crisis in the early 1990s. Sweden's GDP declined by around 5%. In 1992, a run on the currency caused the central bank to briefly increase interest rates to 500%.The response of the government was to cut spending and institute a multitude of reforms to improve Sweden's competitiveness, among them reducing the welfare state and privatising public services and goods. Much of the political establishment promoted EU membership, and a referendum passed with 52.3% in favour of joining the EU on 13 November 1994. Sweden joined the European Union on 1 January 1995. In a 2003 referendum the Swedish electorate voted against the country joining the Euro currency. In 2006 Sweden got its first majority government for decades as the centre-right Alliance defeated the incumbent Social Democrat government. Following the rapid growth of support for the anti-immigration Sweden Democrats, and their entrance to the Riksdag in 2010, the Alliance became a minority cabinet. +Until recently Sweden remained non-aligned militarily, although it participated in some joint military exercises with NATO and some other countries, in addition to extensive cooperation with other European countries in the area of defence technology and defence industry. However, in 2022, in response to the 2022 Russian invasion of Ukraine, Sweden moved to formally join the NATO alliance. The same year, Sweden applied for NATO membership and was formally invited to join the alliance at the NATO Summit in Madrid. The secretary general of NATO Jens Stoltenberg spoke of a fast-track membership process of just a few weeks, however NATO member Turkey has repeatedly hindered Sweden from joining the alliance, demanding Swedish action against the PKK and for Sweden to extradite alleged Kurdish "terrorists" to Turkey, the situation straining relations between the two countries. Turkey has maintained links with Russia since its invasion of Ukraine in 2022.Swedish export weapons were also used by the American military in Iraq. Sweden has a long history of participating in international military operations, including Afghanistan, where Swedish troops are under NATO command, and in EU-sponsored peacekeeping operations in Kosovo, Bosnia and Herzegovina, and Cyprus. Sweden also participated in enforcing a UN mandated no-fly zone over Libya during the Arab Spring. Sweden held the chair of the European Union from 1 July to 31 December 2009. + +In recent decades Sweden has become a more culturally diverse nation due to significant immigration; in 2013, it was estimated that 15% of the population was foreign-born, and an additional 5% of the population were born to two immigrant parents. The influx of immigrants has brought new social challenges. Violent incidents have periodically occurred including the 2013 Stockholm riots, which broke out following the police shooting of an elderly Portuguese immigrant. In response to these violent events, the anti-immigration opposition party, the Sweden Democrats, promoted their anti-immigration policies, while the left-wing opposition blamed growing inequality caused by the centre-right government's socioeconomic policies.In 2014, Stefan Löfven (Social Democrats) won the General Election and became the new Swedish Prime Minister to succeed Fredrik Reinfeldt of the liberal conservative Moderate Party. The Sweden Democrats held the balance of power and voted the government's budget down in the Riksdag, but due to agreements between the government and the Alliance, the government was able to hang onto power. Sweden was heavily affected by the 2015 European migrant crisis, eventually forcing the government to tighten regulations of entry to the country, as Sweden received thousands of asylum seekers and migrants predominantly from Africa and the Middle East per week in autumn, overwhelming existing structures. Some of the asylum restrictions were relaxed again later.The 2018 general election saw the Red-greens lose seats to the right-wing Sweden Democrats and to the centre-right parties of the former Alliance. Despite holding only 33% of the seats in the Riksdag, the Social Democrats and the Greens managed to form a minority government, led by Prime Minister Stefan Lofven, in January 2019, relying on supply and confidence from the Centre Party, Liberals and the Left Party.In August 2021, Prime Minister Stefan Lofven announced his resignation and finance minister Magdalena Andersson was elected as the new head of Sweden's ruling Social Democrats in November 2021. On 30 November 2021, Magdalena Andersson became Sweden's first female prime minister. She formed a minority government made up of only her Social Democrats. Her plan for forming a new coalition government with the Green Party was unsuccessful because her budget proposal failed to pass.The September 2022 general election ended in a narrow win to a bloc of right-wing parties, meaning the resignation of Magdalena Andersson's government. On 18 October 2022, Ulf Kristersson of the Moderate Party became the new Prime Minister of Sweden. Kristersson's Moderates formed a centre-right coalition with the Christian Democrats and the Liberals. The new government will be backed by the biggest right-wing party, Sweden Democrats (SD) led by Jimmie Åkesson, meaning tougher immigration policies as a crucial part of a policy deal with the SD. + + +== Geography == + +Situated in Northern Europe, Sweden lies west of the Baltic Sea and Gulf of Bothnia, providing a long coastline, and forms the eastern part of the Scandinavian Peninsula. To the west is the Scandinavian mountain chain (Skanderna), a range that separates Sweden from Norway. Finland is located to its north-east. It has maritime borders with Denmark, Germany, Poland, Russia, Lithuania, Latvia and Estonia, and it is also linked to Denmark (south-west) by the Öresund Bridge. Its border with Norway (1,619 km long) is the longest uninterrupted border within Europe. +Sweden lies between latitudes 55° and 70° N, and mostly between longitudes 11° and 25° E (part of Stora Drammen island is just west of 11°). + +At 449,964 km2 (173,732 sq mi), Sweden is the 55th-largest country in the world, the fifth-largest country in Europe, and the largest country in Northern Europe. The lowest elevation in Sweden is in the bay of Lake Hammarsjön, near Kristianstad, at −2.41 m (−7.91 ft) below sea level. The highest point is Kebnekaise at 2,111 m (6,926 ft) above sea level. +Sweden has 25 provinces or landskap, based on culture, geography and history. While these provinces serve no political or administrative purpose, they play an important role in people's self-identity. The provinces are usually grouped together in three large lands, parts, the northern Norrland, the central Svealand and southern Götaland. The sparsely populated Norrland encompasses almost 60% of the country. Sweden also has the Vindelfjällen Nature Reserve, one of the largest protected areas in Europe, totaling 562,772 ha (approx. 5,628 km2). +About 15% of Sweden lies north of the Arctic Circle. Southern Sweden is predominantly agricultural, with increasing forest coverage northward. Around 65% of Sweden's total land area is covered with forests. The highest population density is in the Öresund Region in southern Sweden, along the western coast up to central Bohuslän, and in the valley of lake Mälaren and Stockholm. Gotland and Öland are Sweden's largest islands; Vänern and Vättern are its largest lakes. Vänern is the third largest in Europe, after Lake Ladoga and Lake Onega in Russia. Combined with the third- and fourth-largest lakes Mälaren and Hjälmaren, these lakes take up a significant part of southern Sweden's area. Sweden's extensive waterway availability throughout the south was exploited with the building of the Göta Canal in the 19th century, shortening the potential distance between the Baltic Sea south of Norrköping and Gothenburg by using the lake and river network to facilitate the canal.Sweden also has plenty of long rivers draining the lakes. Northern and Central Sweden have several wide rivers known as älvar, commonly sourced within the Scandinavian Mountains. The longest river is Klarälven-Göta älv, which originates in Trøndelag in central Norway, running 1,160 kilometres (720 mi) before it enters the sea at Gothenburg. Dalälven and the Torne are the second and third longest rivers in the country. Torne marks a large part of the Finland border. In southern Sweden, narrower rivers known as åar are also common. The vast majority of municipal seats are set either on the sea, a river or a lake and the majority of the country's population live in coastal municipalities. + + +=== Climate === + +Most of Sweden has a temperate climate, despite its northern latitude, with largely four distinct seasons and mild temperatures throughout the year. The winter in the far south is usually weak and is manifested only through some shorter periods with snow and sub-zero temperatures; autumn may well turn into spring there, without a distinct period of winter. The northern parts of the country have a subarctic climate while the central parts have a humid continental climate. The coastal south can be defined as having either a humid continental climate using the 0 °C isotherm, or an oceanic climate using the −3 °C isotherm. +Due to the increased maritime moderation in the peninsular south, summer differences between the coastlines of the southernmost and northernmost regions are about 2 °C (4 °F) in summer and 10 °C (18 °F) in winter. This grows further when comparing areas in the northern interior where the winter difference in the far north is about 15 °C (27 °F) throughout the country. The warmest summers usually happen in the Mälaren Valley around Stockholm due to the vast landmass shielding the middle east coast from Atlantic low-pressure systems in July compared to the south and west. Daytime highs in Sweden's municipal seats vary from 19 °C (66 °F) to 24 °C (75 °F) in July and −9 °C (16 °F) to 3 °C (37 °F) in January. The colder temperatures are influenced by the higher elevation in the northern interior. At sea level instead, the coldest average highs range from 21 °C (70 °F) to −6 °C (21 °F). As a result of the mild summers, the arctic region of Norrbotten has some of the northernmost agriculture in the world.Sweden is much warmer and drier than other places at a similar latitude, and even somewhat farther south, mainly because of the combination of the Gulf Stream and the general west wind drift, caused by the direction of planet Earth's rotation. Sweden has much milder winters than many parts of Russia, Canada, and the northern United States. Because of Sweden's high latitude, the length of daylight varies greatly. North of the Arctic Circle, the sun never sets for part of each summer, and it never rises for part of each winter. In the capital, Stockholm, daylight lasts for more than 18 hours in late June but only around 6 hours in late December. Sweden receives between 1,100 and 1,900 hours of sunshine annually. + +The highest temperature ever recorded in Sweden was 38 °C (100 °F) in Målilla in 1947, while the coldest temperature ever recorded was −52.6 °C (−62.7 °F) in Vuoggatjålme on 2 February 1966. Temperatures expected in Sweden are heavily influenced by the large Fennoscandian landmass, as well as continental Europe and western Russia, which allows hot or cool inland air to be easily transported to Sweden. That, in turn, renders most of Sweden's southern areas having warmer summers than almost everywhere in the nearby British Isles, even matching temperatures found along the continental Atlantic coast as far south as in northern Spain. In winter, however, the same high-pressure systems sometimes put the entire country far below freezing temperatures. There is some maritime moderation from the Atlantic which renders the Swedish continental climate less severe than that of nearby Russia. +Apart from the ice-free Atlantic bringing marine air into Sweden tempering winters, the mildness is further explained by prevailing low-pressure systems postponing winter, with the long nights often staying above freezing in the south of the country due to the abundant cloud cover. By the time winter finally breaks through, daylight hours rise quickly, ensuring that daytime temperatures soar quickly in spring. With the greater number of clear nights, frosts remain commonplace quite far south as late as April. +The relative strength of low and high-pressure systems of marine and continental air also define the highly variable summers. When hot continental air hits the country, the long days and short nights frequently bring temperatures up to 30 °C (86 °F) or above even in coastal areas. Nights normally remain cool, especially in inland areas. Coastal areas can see so-called tropical nights above 20 °C (68 °F) occur due to the moderating sea influence during warmer summers. Summers can be cool, especially in the north of the country. Transitional seasons are normally quite extensive and the four-season climate applies to most of Sweden's territory, except in Scania where some years do not record a meteorological winter (see table below) or in the high Lapland mountains where polar microclimates exist. +On average, most of Sweden receives between 500 and 800 mm (20 and 31 in) of precipitation each year, making it considerably drier than the global average. The south-western part of the country receives more precipitation, between 1,000 and 1,200 mm (39 and 47 in), and some mountain areas in the north are estimated to receive up to 2,000 mm (79 in). Despite northerly locations, southern and central Sweden may have almost no snow in some winters. Most of Sweden is located in the rain shadow of the Scandinavian Mountains through Norway and north-west Sweden. The blocking of cool and wet air in summer, as well as the greater landmass, leads to warm and dry summers far north in the country, with quite warm summers at the Bothnia Bay coast at 65 degrees latitude, which is unheard of elsewhere in the world at such northerly coastlines. +It is predicted that as the Barents Sea gets less frozen in the coming winters, becoming thus "Atlantified", additional evaporation will increase future snowfalls in Sweden and much of continental Europe. + + +=== Vegetation === + +Sweden has a considerable south to north distance (stretching between the latitudes N 55:20:13 and N 69:03:36) which causes large climatic difference, especially during the winter. The related matter of the length and strength of the four seasons plays a role in which plants that naturally can grow at various places. Sweden is divided in five major vegetation zones. These are: + +The southern deciduous forest zone +The southern coniferous forest zone +The northern coniferous forest zone, or the Taiga +The alpine-birch zone +The bare mountain zoneSouthern deciduous forest zone, also known as the nemoral region, the southern deciduous forest zone is a part of a larger vegetation zone which also includes Denmark and large parts of Central Europe. It has to a rather large degree become agricultural areas, but larger and smaller forests still exist. The region is characterised by a large wealth of trees and shrubs. The beech are the most dominant tree, but oak can also form smaller forests. elm at one time formed forests, but have been heavily reduced due to Dutch Elm disease. Other important trees and shrubs in this zone include hornbeam, elder, hazel, fly honeysuckle, linden (lime), spindle, yew, alder buckthorn, blackthorn, aspen, European rowan, Swedish whitebeam, juniper, European holly, ivy, dogwood, goat willow, larch, bird cherry, wild cherry, maple, ash, alder along creeks, and in sandy soil birch compete with pine. Spruce is not native but between approximately 1870 and 1980, large areas were planted with it. They tend to grow too quickly due to being outside of their native range and large distances between the tree rings cause poor board quality. Later some spruce trees began to die before reaching optimal height, and many more of the coniferous trees were uprooted during cyclones. During the last 40–50 years large areas of former spruce plantings have been replanted with deciduous forest.Southern coniferous forest zone, also known as the boreo-nemoral region, the southern coniferous forest zone is delimited by the oak's northern natural limit (limes norrlandicus) and the Spruce's southern natural limit, between the southern deciduous zone and the Taiga farther north. In the southern parts of this zone the coniferous species are found, mainly spruce and pine, mixed with various deciduous trees. Birch grows largely everywhere. The beech's northern boundary crosses this zone. This is however not the case with oak and ash. Although in its natural area, also planted Spruce are common, and such woods are very dense, as the spruces can grow very tight, especially in this vegetation zone's southern areas. +The northern coniferous forest zone or the Taiga begins north of the natural boundary of the oak. Of deciduous species the birch is the only one of significance. Pine and spruce are dominant, but the forests are slowly but surely more sparsely grown the farther towards the north it gets. In the extreme north is it difficult to state the trees forms true forests at all, due to the large distances between the trees.The alpine-birch zone, in the Scandinavian mountains, depending on both latitude and altitude, is an area where only a smaller kind of birch (Betula pubescens or B.tortuosa) can grow. Where this vegetation zone ends, no trees grow at all: the bare mountain zone.Sweden had a 2019 Forest Landscape Integrity Index mean score of 5.35/10, ranking it 103rd globally out of 172 countries. + + +== Government and politics == + + +=== Constitutional framework === + +Sweden has four fundamental laws (Swedish: grundlagar) which together form the Constitution: the Instrument of Government (Swedish: Regeringsformen), the Act of Succession (Swedish: Successionsordningen), the Freedom of the Press Act (Swedish: Tryckfrihetsförordningen), and the Fundamental Law on Freedom of Expression (Swedish: Yttrandefrihetsgrundlagen).The public sector in Sweden is divided into two parts: the legal person known as the State (Swedish: staten) and local authorities: the latter include Regional Councils (Swedish: regioner) (renamed from county councils (landsting) in 2020) and local Municipalities (Swedish: kommuner). The local authorities, rather than the State, make up the larger part of the public sector in Sweden. Regional Councils and Municipalities are independent of one another, the former merely covers a larger geographical area than the latter. The local authorities have self-rule, as mandated by the Constitution, and their own tax base. Notwithstanding their self-rule, local authorities are nevertheless in practice dependent upon the State, as the parameters of their responsibilities and the extent of their jurisdiction are specified in the Local Government Act (Swedish: Kommunallagen) passed by the Riksdag.Sweden is a constitutional monarchy, and King Carl XVI Gustaf is the head of state, but the role of the monarch is limited to ceremonial and representative functions. Under the provisions of the 1974 Instrument of Government, the King lacks any formal political power. The King opens the annual Riksdag session, chairs the Special Council held during a change of Government, holds regular Information Councils with the Prime Minister and the Government, chairs the meetings of the Advisory Council on Foreign Affairs (Swedish: Utrikesnämnden), and receives Letters of Credence of foreign ambassadors to Sweden and signs those of Swedish ambassadors sent abroad. In addition, the King pays State Visits abroad and receives those incoming as host. Apart from strictly official duties, the King and the other members of Royal Family undertake a variety of unofficial and other representative duties within Sweden and abroad.Legislative power is vested in the unicameral Riksdag with 349 members. General elections are held every four years, on the second Sunday of September. Legislation may be initiated by the Government or by members of the Riksdag. Members are elected on the basis of proportional representation to a four-year term. The internal workings of the Riksdag are, in addition to the Instrument of Government, regulated by the Riksdag Act (Swedish: Riksdagsordningen). The fundamental laws can be altered by the Riksdag alone; only an absolute majority with two separate votes, separated by a general election in between, is required. + +The Government (Swedish: Regeringen) operates as a collegial body with collective responsibility and consists of the Prime Minister — appointed and dismissed by the Speaker of the Riksdag (following an actual vote in the Riksdag before an appointment can be made) — and other cabinet ministers (Swedish: Statsråd), appointed and dismissed at the sole discretion of the Prime Minister. The Government is the supreme executive authority and is responsible for its actions to the Riksdag.Most of the State administrative authorities (Swedish: statliga förvaltningsmyndigheter) report to the Government, including (but not limited to) the Armed Forces, the Enforcement Authority, the National Library, the Swedish police and the Tax Agency. A unique feature of Swedish State administration is that individual cabinet ministers do not bear any individual ministerial responsibility for the performance of the agencies within their portfolio; as the director-generals and other heads of government agencies reports directly to the Government as a whole; and individual ministers are prohibited to interfere; thus the origin of the pejorative in Swedish political parlance term ministerstyre (English: "ministerial rule") in matters that are to be handled by the individual agencies, unless otherwise specifically provided for in law. +The Judiciary is independent from the Riksdag, Government and other State administrative authorities. The role of judicial review of legislation is not practised by the courts; instead, the Council on Legislation gives non-binding opinions on legality. There is no stare decisis in that courts are not bound by precedent, although it is influential. + + +=== Political parties and elections === + +The Swedish Social Democratic Party has played a leading role in Swedish politics since 1917, after the Reformists had confirmed their strength and the left-wing revolutionaries formed their own party. After 1932, most governments have been dominated by the Social Democrats. Only six general elections since World War II—1976, 1979, 1991, 2006, 2010 and 2022—have given the assembled bloc of centre-right parties enough seats in the Riksdag to form a government. +For over 50 years, Sweden had had five parties who continually received enough votes to gain seats in the Riksdag—the Social Democrats, the Moderate Party, the Centre Party, the Liberal People's Party and the Left Party—before the Green Party became the sixth party in the 1988 election. In the 1991 election, while the Greens lost their seats, two new parties gained seats for the first time: the Christian Democrats and New Democracy. The 1994 election saw the return of the Greens and the demise of New Democracy. It was not until elections in 2010 that an eighth party, the Sweden Democrats, gained Riksdag seats. In the elections to the European Parliament, parties who have failed to pass the Riksdag threshold have managed to gain representation at that venue: the June List (2004–2009), the Pirate Party (2009–2014), and Feminist Initiative (2014–2019). + +In the 2006 general election the Moderate Party formed the centre-right Alliance for Sweden bloc and won a majority of the Riksdag seats. In the 2010 general election the Alliance contended against a unified left block consisting of the Social Democrats, the Greens and the Left Party. The Alliance won a plurality of 173 seats, but remained two seats short of a 175-seat majority. Nevertheless, neither the Alliance, nor the left block, chose to form a coalition with the Sweden Democrats.The outcome of the 2014 general election resulted in the attainment of more seats by the three centre-left parties in comparison to the centre-right Alliance for Sweden, with the two blocs receiving 159 and 141 seats respectively. The non-aligned Sweden Democrats more than doubled their support and won the remaining 49 seats. On 3 October 2014, Stefan Löfven formed a minority government consisting of the Social Democrats and the Greens.In August 2021, Prime Minister Stefan Löfven announced his resignation and finance minister Magdalena Andersson was elected as the new head of Sweden's ruling Social Democrats in November 2021. On 30 November 2021, Magdalena Andersson became Sweden's first female prime minister. She formed a minority government made up of only her Social Democrats. Her plan for forming a new coalition government with the Green Party was unsuccessful the coalition partner left after her budget proposal failed to pass. In the 2022 election, the remnants of the Alliance were able to secure a narrow majority. This was backed up by the surging Sweden Democrats becoming the second largest party. The election saw Andersson resigning from her post, with the Moderate leader Ulf Kristersson the likely replacement. The election saw the right-wing coalition win dozens of small towns always dominated by the left, while suffering major losses in the big cities.Election turnout in Sweden has always been high by international comparison. Although it declined in recent decades, the latest elections saw an increase in voter turnout (80.11% in 2002, 81.99% in 2006, 84.63% in 2010, 85.81 in 2014) and 87.18% in 2018. Swedish politicians enjoyed a high degree of confidence from the citizens in the 1960s, However, that level of confidence has since declined steadily, and is now at a markedly lower level than in its Scandinavian neighbours. + + +=== Administrative divisions === + +Sweden is a unitary state divided into 21 regions (regioner) and 290 municipalities (kommuner). Every region corresponds to a county (län) with a number of municipalities per county. Regions and municipalities are both local government but have different roles and separate responsibilities. Health care, public transport and certain cultural institutions are administered by regional councils. Preschools, primary and secondary schooling, public water utilities, garbage disposal, elderly care and rescue services are administered by the municipalities. Gotland is a special case of being a region with only one municipality and the functions of region and municipality are performed by the same organisation.Municipal and region government in Sweden is similar to city commission and cabinet-style council government. Both levels have legislative assemblies (municipal councils and region assemblies of between 31 and 101 members (always an uneven number) that are elected from party-list proportional representation at the general election which are held every four years in conjunction with the national parliamentary elections. +Municipalities are also divided into a total of 2,512 parishes (församlingar). These have no official political responsibilities but are traditional subdivisions of the Church of Sweden and still have some importance as census districts for census-taking and elections. +The Swedish central government has 21 County Administrative Boards (Swedish: länsstyrelser), which are responsible for regional state administration not assigned to other government agencies or local government. Each county administrative board is led by a County Governor (Swedish: landshövding) appointed for a term of six years. The list of previous officeholders for the counties stretches back, in most cases, to 1634 when the counties were created by Lord High Chancellor Count Axel Oxenstierna. The main responsibility of the County Administrative Board is to co-ordinate the development of the county in line with goals set by the Riksdag and Government. +There are older historical divisions, primarily the twenty-five provinces and three lands, which still retain cultural significance. + + +=== Political history === + +The actual age of the kingdom of Sweden is unknown. Establishing the age depends mostly on whether Sweden should be considered a nation when the Svear (Sweonas) ruled Svealand or if the emergence of the nation started with the Svear and the Götar (Geats) of Götaland being united under one ruler. In the first case, Svealand was first mentioned as having one single ruler in the year 98 by Tacitus, but it is almost impossible to know for how long it had been this way. However, historians usually start the line of Swedish monarchs from when Svealand and Götaland were ruled under the same king, namely Eric the Victorious (Geat) and his son Olof Skötkonung in the tenth century. These events are often described as the consolidation of Sweden, although substantial areas were conquered and incorporated later. +Earlier kings, for which no reliable historical sources exist, can be read about in mythical kings of Sweden and semi-legendary kings of Sweden. Many of these kings are only mentioned in various saga and blend with Norse mythology. +The title Sveriges och Götes Konung was last used for Gustaf I of Sweden, after which the title became "King of Sweden, of the Goths and of the Wends" (Sveriges, Götes och Vendes Konung) in official documentation. Up until the beginning of the 1920s, all laws in Sweden were introduced with the words, "We, the king of Sweden, of the Goths and Wends". This title was used up until 1973. The present King of Sweden, Carl XVI Gustaf, was the first monarch officially proclaimed "King of Sweden" (Sveriges Konung) with no additional peoples mentioned in his title. +The term riksdag was used for the first time in the 1540s, although the first meeting where representatives of different social groups were called to discuss and determine affairs affecting the country as a whole took place as early as 1435, in the town of Arboga. During the Riksdag assemblies of 1527 and 1544, under King Gustav Vasa, representatives of all four estates of the realm (clergy, nobility, townsmen and peasants) were called on to participate for the first time. The monarchy became hereditary in 1544. +Executive power was historically shared between the King and an aristocratic Privy council until 1680, followed by the King's autocratic rule initiated by the commoner estates of the Riksdag. As a reaction to the failed Great Northern War, a parliamentary system was introduced in 1719, followed by three different flavours of constitutional monarchy in 1772, 1789 and 1809, the latter granting several civil liberties. Already during the first of those three periods, the 'Era of Liberty' (1719–72) the Swedish Rikstag had developed into a very active Parliament, and this tradition continued into the nineteenth century, laying the basis for the transition towards modern democracy at the end of that century.In 1866, Sweden became a constitutional monarchy with a bicameral parliament, with the First Chamber indirectly elected by local governments, and the Second Chamber directly elected in national elections every four years. In 1971 the parliament became unicameral. Legislative power was (symbolically) shared between the King and the Riksdag until 1975. Swedish taxation is controlled by the Riksdag. + +Sweden has a history of strong political involvement by ordinary people through its "popular movements" (Folkrörelser), the most notable being trade unions, the independent Christian movement, the temperance movement, the women's movement, and the intellectual property pirate movements. Sweden was the first country in the world to outlaw corporal punishment of children by their parents (parents' right to spank their own children was first removed in 1966, and it was explicitly prohibited by law from July 1979). +Sweden is currently leading the EU in statistics measuring equality in the political system and equality in the education system. The Global Gender Gap Report 2006 ranked Sweden as the number one country in terms of gender equality.Some Swedish political figures have become known worldwide, among these are: Raoul Wallenberg, Folke Bernadotte, the former Secretary-General of the United Nations Dag Hammarskjöld, the former Prime Minister Olof Palme, the former Prime Minister and later Foreign minister Carl Bildt, the former President of the General Assembly of the United Nations Jan Eliasson, and the former International Atomic Energy Agency Iraq inspector Hans Blix. + + +=== Judicial system === + +The courts are divided into two parallel and separate systems: The general courts (allmänna domstolar) for criminal and civil cases, and general administrative courts (allmänna förvaltningsdomstolar) for cases relating to disputes between private persons and the authorities. Each of these systems has three tiers, where the top tier court of the respective system typically only will hear cases that may become precedent. There are also a number of special courts, which will hear a narrower set of cases, as set down by legislation. While independent in their rulings, some of these courts are operated as divisions within courts of the general or general administrative courts. + +The Supreme Court of Sweden (Swedish: Högsta domstolen) is the third and final instance in all civil and criminal cases in Sweden. Before a case can be decided by the Supreme Court, leave to appeal must be obtained, and with few exceptions, leave to appeal can be granted only when the case is of interest as a precedent. The Supreme Court consists of 16 Justices (Swedish: justitieråd), appointed by the Government, but the court as an institution is independent of the Riksdag, and the Government is not able to interfere with the decisions of the court. +According to a victimisation survey of 1,201 residents in 2005, Sweden has above-average crime rates compared to other EU countries. Sweden has high or above-average levels of assaults, sexual assaults, hate crimes, and consumer fraud. Sweden has low levels of burglary, car theft and drug problems. Bribe seeking is rare.A mid-November 2013 news report announced that four prisons in Sweden were closed during the year due to a significant drop in the number of inmates. The decrease in the number of Swedish prisoners was considered "out-of-the-ordinary" by the head of Sweden's prison and probation services, with prison numbers in Sweden falling by around 1% a year since 2004. Prisons were closed in the towns of Åby, Håja, Båtshagen, and Kristianstad. + + +=== Foreign relations === + +Throughout the 20th century, Swedish foreign policy was based on the principle of non-alignment in peacetime and neutrality in wartime. Sweden's government pursued an independent course of nonalignment in times of peace so that neutrality would be possible in the event of war.Sweden's doctrine of neutrality is often traced back to the 19th century as the country has not been in a state of war since the end of the Swedish campaign against Norway in 1814. During World War II Sweden joined neither the allied nor axis powers. This has sometimes been disputed since in effect Sweden allowed in select cases the Nazi regime to use its railroad system to transport troops and goods, especially iron ore from mines in northern Sweden, which was vital to the German war machine. However, Sweden also indirectly contributed to the defence of Finland in the Winter War, and permitted the training of Norwegian and Danish troops in Sweden after 1943. + +During the early Cold War era, Sweden combined its policy of non-alignment and a low profile in international affairs with a security policy based on strong national defence. The function of the Swedish military was to deter attack. At the same time, the country maintained relatively close informal connections with the Western bloc, especially in the realm of intelligence exchange. In 1952, a Swedish DC-3 was shot down over the Baltic Sea by a Soviet MiG-15 jet fighter. Later investigations revealed that the plane was actually gathering information for NATO. Another plane, a Catalina search and rescue plane, was sent out a few days later and shot down by the Soviets as well. Prime Minister Olof Palme made an official visit to Cuba during the 1970s, during which he denounced Fulgencio Batista's government and praised contemporary Cuban and Cambodian revolutionaries in a speech. +Beginning in the late 1960s, Sweden attempted to play a more significant and independent role in international relations. It involved itself significantly in international peace efforts, especially through the United Nations, and in support to the Third World. +On 27 October 1981, a Whiskey-class submarine (U 137) from the Soviet Union ran aground close to the naval base at Karlskrona in the southern part of the country. Research has never clearly established whether the submarine ended up on the shoals through a navigational mistake or if an enemy committed espionage against Swedish military potential. The incident triggered a diplomatic crisis between Sweden and the Soviet Union. Following the 1986 assassination of Olof Palme and with the end of the Cold War, Sweden has adopted a more traditional foreign policy approach. Nevertheless, the country remains active in peacekeeping missions and maintains a considerable foreign aid budget. +Since 1995 Sweden has been a member of the European Union, and as a consequence of a new world security situation the country's foreign policy doctrine has been partly modified, with Sweden playing a more active role in European security co-operation. In 2022, in response to Russia's invasion of Ukraine, Sweden moved to formally join the NATO alliance. The secretary general of NATO Jens Stoltenberg spoke of a fast-track membership process of just a few weeks, however NATO member Turkey has repeatedly hindered Sweden from joining the alliance, demanding Swedish action against the PKK and for Sweden to extradite alleged Kurdish "terrorists" to Turkey, the situation straining relations between the two countries. Turkey has maintained links with Russia since its invasion of Ukraine in 2022. + + +=== Military === + +The law is enforced in Sweden by several government entities. The Swedish police is a Government agency concerned with police matters. The National Task Force is a national SWAT unit within the police force. The Swedish Security Service's responsibilities are counter-espionage, anti-terrorist activities, protection of the constitution and protection of sensitive objects and people. +The Försvarsmakten (Swedish Armed Forces) are a government agency reporting to the Swedish Ministry of Defence and responsible for the peacetime operation of the armed forces of Sweden. The primary task of the agency is to train and deploy peacekeeping forces abroad, while maintaining the long-term ability to refocus on the defence of Sweden in the event of war. The armed forces are divided into Army, Air Force and Navy. The head of the armed forces is the Supreme Commander (Överbefälhavaren, ÖB), the most senior commissioned officer in the country. Up to 1974, the King was pro forma Commander-in-Chief, but in reality it was clearly understood through the 20th century that the monarch would have no active role as a military leader. + +Until the end of the Cold War, nearly all males reaching the age of military service were conscripted. In recent years, the number of conscripted males has shrunk dramatically, while the number of female volunteers has increased slightly. Recruitment has generally shifted towards finding the most motivated recruits, rather than solely focusing on those otherwise most fit for service. By law, all soldiers serving abroad must be volunteers. In 1975, the total number of conscripts was 45,000. By 2003, it was down to 15,000. +On 1 July 2010, Sweden ended routine conscription, switching to an all-volunteer force unless otherwise required for defence readiness. Emphasis was to be placed on only recruiting those later prepared to volunteer for international service. The total forces gathered would consist of about 60,000 personnel. This in comparison with the 1980s, before the fall of the Soviet Union, when Sweden could gather up to 1,000,000 servicemembers. +However, on 11 December 2014, due to tensions in the Baltic area, the Swedish Government reintroduced one part of the Swedish conscription system, refresher training. On 2 March 2017, the government decided to reintroduce the remaining part of the Swedish conscription system, basic military training. The first recruits began their training in 2018. As the law is now gender neutral, both men and women may have to serve. Sweden decided not to sign the UN treaty on the Prohibition of Nuclear Weapons.Swedish units have taken part in peacekeeping operations in the Democratic Republic of the Congo, Cyprus, Bosnia and Herzegovina, Kosovo, Liberia, Lebanon, Afghanistan and Chad. + + +== Economy == + +Sweden is the twelfth-richest country in the world in terms of GDP (gross domestic product) per capita and a high standard of living is experienced by its citizens. Sweden is an export-oriented mixed economy. Timber, hydropower and iron ore constitute the resource base of an economy with a heavy emphasis on foreign trade. Sweden's engineering sector accounts for 50% of output and exports, while telecommunications, the automotive industry and the pharmaceutical industries are also of great importance. Sweden is the ninth-largest arms exporter in the world. Agriculture accounts for 2% of GDP and employment. The country ranks among the highest for telephone and Internet access penetration.Trade unions, employers' associations and collective agreements cover a large share of the employees in Sweden. The high coverage of collective agreements is achieved despite the absence of state mechanisms extending collective agreements to whole industries or sectors. Both the prominent role of collective bargaining and the way in which the high rate of coverage is achieved reflect the dominance of self-regulation (regulation by the labour market parties themselves) over state regulation in Swedish industrial relations. When the Swedish Ghent system was changed in 2007, resulting in considerably raised fees to unemployment funds, a substantial decline in union density and density of unemployment funds occurred. + +In 2010, Sweden's income Gini coefficient was the third lowest among developed countries, at 0.25—slightly higher than Japan and Denmark—suggesting Sweden had low income inequality. However, Sweden's wealth Gini coefficient at 0.853 was the second highest in developed countries, and above European and North American averages, suggesting high wealth inequality. Even on a disposable income basis, the geographical distribution of Gini coefficient of income inequality varies within different regions and municipalities of Sweden. Danderyd, outside Stockholm, has Sweden's highest Gini coefficient of income inequality, at 0.55, while Hofors near Gävle has the lowest at 0.25. In and around Stockholm and Scania, two of the more densely populated regions of Sweden, the income Gini coefficient is between 0.35 and 0.55.In terms of structure, the Swedish economy is characterised by a large, knowledge-intensive and export-oriented manufacturing sector; an increasing, but comparatively small, business service sector; and by international standards, a large public service sector. Large organisations, both in manufacturing and services, dominate the Swedish economy. High and medium-high technology manufacturing accounts for 9.9% of GDP.The 20 largest (by turnover) registered Swedish companies in 2007 were Volvo, Ericsson, Vattenfall, Skanska, Sony Ericsson Mobile Communications AB, Svenska Cellulosa Aktiebolaget, Electrolux, Volvo Personvagnar, TeliaSonera, Sandvik, Scania, ICA, Hennes & Mauritz, IKEA, Nordea, Preem, Atlas Copco, Securitas, Nordstjernan and SKF. The vast majority of Sweden's industry is privately controlled, unlike many other industrialised Western countries, and, in accordance with a historical standard, publicly owned enterprises are of minor importance. + +An estimated 4.5 million Swedish residents are employed, and around a third of the workforce completed tertiary education. In terms of GDP per-hour-worked, Sweden was the world's ninth highest in 2006 at US$31, compared to US$22 in Spain and US$35 in the United States. GDP per-hour-worked is growing 2.5% per year for the economy as a whole and the trade-terms-balanced productivity growth is 2%. According to the OECD, deregulation, globalisation, and technology sector growth have been key productivity drivers. Sweden is a world leader in privatised pensions and pension funding problems are relatively small compared to many other Western European countries. A pilot program to test the feasibility of a six-hour workday, without loss of pay, will commence in 2014, involving the participation of Gothenburg municipal staff. The Swedish government is seeking to reduce its costs through decreased sick leave hours and increased efficiency. + +The typical worker receives 40% of his or her labour costs after the tax wedge. Total tax collected by Sweden as a percentage of its GDP peaked at 52.3% in 1990. The country faced a real estate and banking crisis in 1990–1991, and consequently passed tax reforms in 1991 to implement tax rate cuts and tax base broadening over time. Since 1990, taxes as a percentage of GDP collected by Sweden have been dropping, with total tax rates for the highest income earners dropping the most. In 2010 45.8% of the country's GDP was collected as taxes, the second highest among OECD countries, and nearly double the percentage in the US or South Korea. Tax income-financed employment represents a third of the Swedish workforce, a substantially higher proportion than in most other countries. Overall, GDP growth has been fast since reforms—especially those in manufacturing—were enacted in the early 1990s. + +Sweden is the fourth-most competitive economy in the world, according to the World Economic Forum in its Global Competitiveness Report 2012–2013. Sweden is the top performing country in the 2014 Global Green Economy Index (GGEI). Sweden is ranked fourth in the IMD World Competitiveness Yearbook 2013. According to the book The Flight of the Creative Class by the US economist Professor Richard Florida of the University of Toronto, Sweden is ranked as having the best creativity in Europe for business and is predicted to become a talent magnet for the world's most purposeful workers. The book compiled an index to measure the kind of creativity it claims is most useful to business—talent, technology and tolerance.Sweden maintains its own currency, the Swedish krona (SEK), a result of the Swedes having rejected the euro in a referendum. The Swedish Riksbank—founded in 1668 and thus the oldest central bank in the world—is currently focusing on price stability with an inflation target of 2%. According to the Economic Survey of Sweden 2007 by the OECD, the average inflation in Sweden has been one of the lowest among European countries since the mid-1990s, largely because of deregulation and quick utilisation of globalisation.The largest trade flows are with Germany, the United States, Norway, the United Kingdom, Denmark and Finland. +Financial deregulation in the 1980s adversely affected the property market, leading to a bubble and eventually a crash in the early 1990s. Commercial property prices fell by up to two thirds, resulting in two Swedish banks having to be taken over by the government. In the following two decades the property sector strengthened. By 2014, legislators, economists and the IMF were again warning of a bubble with residential property prices soaring and the level of personal mortgage debt expanding. Household debt-to-income rose above 170% as the IMF was calling on legislators to consider zoning reform and other means of generating a greater supply of housing as demand was outstripping what was available, pushing prices higher. By August 2014, 40% of home borrowers had interest-only loans while those that did not were repaying principal at a rate that would take 100 years to fully repay. + + +=== Energy === + +Sweden's energy market is largely privatised. The Nordic energy market is one of the first liberalised energy markets in Europe and it is traded in NASDAQ OMX Commodities Europe and Nord Pool Spot. In 2006, out of a total electricity production of 139 TWh, electricity from hydropower accounted for 61 TWh (44%), and nuclear power delivered 65 TWh (47%). At the same time, the use of biofuels, peat etc. produced 13 TWh (9%) of electricity, while wind power produced 1 TWh (1%). Sweden was a net importer of electricity by a margin of 6 TWh. Biomass is mainly used to produce heat for district heating and central heating and industry processes. +Sweden joined the International Energy Agency in 1974, after the 1973 oil crisis strengthened Sweden's commitment to decrease dependence on imported fossil fuels. To protect against unexpected oil supply shocks and in accordance with international commitments made through the IEA, Sweden maintains a strategic petroleum reserve of at least 90 days of net oil imports. As of February 2022, Sweden's oil reserves totalled 130 days’ worth of net imports. Sweden has moved to generate electricity mostly from hydropower and nuclear power. The use of nuclear power has been limited, however. Among other things, the accident of Three Mile Island Nuclear Generating Station (United States) prompted the Riksdag to ban new nuclear plants. In March 2005, an opinion poll showed that 83% supported maintaining or increasing nuclear power.Sweden is considered a "global leader" in decarbonisation. Politicians have made announcements about oil phase-out in Sweden, decrease of nuclear power, and multibillion-dollar investments in renewable energy and energy efficiency. The country has for many years pursued a strategy of indirect taxation as an instrument of environmental policy, including energy taxes in general and carbon dioxide taxes in particular. Sweden was the first nation to implement carbon pricing, and its carbon prices remain the highest in the world as of 2020. This model has been shown to be particularly effective at decarbonizing the nation's economy. In 2014, Sweden was net exporter of electricity by a margin of 16 TWh; the production from wind power mills had increased to 11.5 TWh. + + +=== Transport === + +Sweden has 162,707 km (101,101 mi) of paved road and 1,428 km (887 mi) of expressways. Motorways run through Sweden and over the Øresund Bridge to Denmark. New motorways are still under construction and a new motorway from Uppsala to Gävle was finished on 17 October 2007. Sweden had left-hand traffic (vänstertrafik in Swedish) from approximately 1736 and continued to do so well into the 20th century. Voters rejected right-hand traffic in 1955, but after the Riksdag passed legislation in 1963 changeover took place on 3 September 1967, known in Swedish as Dagen H. +The Stockholm metro is the only underground system in Sweden and serves the city of Stockholm via 100 stations. The rail transport market is privatised, but while there are many privately owned enterprises, the largest operators are still owned by the state. The counties have financing, ticket and marketing responsibility for local trains. For other trains the operators handle tickets and marketing themselves. Operators include SJ, Veolia Transport, Green Cargo, Tågkompaniet and Inlandsbanan. Most of the railways are owned and operated by Trafikverket. + +Most tram networks were closed in 1967, as Sweden changed from left-side to right-side driving. But they survived in Norrköping, Stockholm and Gothenburg, with Gothenburg tram network being the largest. A new tram line opened in Lund on 13 December 2020. +The largest airports include Stockholm–Arlanda Airport (16.1 million passengers in 2009) 40 km (25 mi) north of Stockholm, Göteborg Landvetter Airport (4.3 million passengers in 2008), and Stockholm–Skavsta Airport (2.0 million passengers). Sweden hosts the two largest port companies in Scandinavia, Port of Göteborg AB (Gothenburg) and the transnational company Copenhagen Malmö Port AB. The most used airport for a large part of Southern Sweden is Kastrup or Copenhagen Airport which is located only 12 minutes by train from the closest Swedish railway station, Hyllie. Copenhagen Airport also is the largest international airport in Scandinavia and Finland. +Sweden also has a number of car ferry connections to several neighbouring countries. This includes a route from Umeå across the Gulf of Bothnia to Vaasa in Finland. There are several connections from the Stockholm area across the Sea of Åland to Mariehamn in Åland as well as Turku and Helsinki on the Finnish mainland and beyond to Estonia and St Petersburg in Russia. Ferry routes from the Stockholm area also connect with Ventspils and Riga in Latvia as well as Gdańsk in Poland across the Baltic Sea. The ferry ports of Karlskrona and Karlshamn in southeastern Sweden serve Gdynia, Poland, and Klaipėda, Lithuania. Ystad and Trelleborg near the southern tip of Sweden have ferry links with the Danish island of Bornholm and the German ports of Sassnitz, Rostock and Travemünde, respectively, and ferries run to Świnoujście, Poland, from both of them. Trelleborg is the busiest ferry port in Sweden in terms of weight transported by lorry. Its route to Sassnitz started as a steam-operated railway ferry in the 19th century, and today's ferry still carries trains to Berlin during the summer months. Another ferry route to Travemünde originates from Malmö. Despite the opening of the fixed link to Denmark, the Øresund Bridge, the busiest ferry route remains the short link across the narrowest section of the Øresund between Helsingborg and the Danish port of Helsingør, known as the HH Ferry route. There are over seventy departures a day each way; during peak times, a ferry departs every fifteen minutes. Ports higher up the Swedish west coast include Varberg, with a ferry connection across the Kattegat to Grenaa in Denmark, and Göteborg, serving Frederikshavn at the northern tip of Denmark and Kiel in Germany. Finally, there are ferries from Strömstad near the Norwegian border to destinations around the Oslofjord in Norway. There used to be ferry services to the United Kingdom from Göteborg to destinations such as Immingham, Harwich and Newcastle, but these have been discontinued. +Sweden has two domestic ferry lines with large vessels, both connecting Gotland with the mainland. The lines leave from Visby harbour on the island, and the ferries sail to either Oskarshamn or Nynäshamn. A smaller car ferry connects the island of Ven in Øresund with Landskrona. + + +=== Public policy === + +Sweden has one of the most highly developed welfare states in the world. According to a 2012 OECD report, the country had the second-highest public social spending as a percentage of its GDP after France (27.3% and 28.4%, respectively), and the third-highest total (public and private) social spending at 30.2% of its GDP, after France and Belgium (31.3% and 31.0%, respectively). Sweden spent 6.3% of its GDP, the ninth-highest among 34 OECD countries, to provide equal access to education. On health care, the country spent 10.0% of its total GDP, the 12th highest.Historically, Sweden provided solid support for free trade (except agriculture) and mostly relatively strong and stable property rights (both private and public), though some economists have pointed out that Sweden promoted industries with tariffs and used publicly subsidised R&D during the country's early critical years of industrialisation. After World War II a succession of governments expanded the welfare state by raising the taxes. During this period Sweden's economic growth was also one of the highest in the industrial world. A series of successive social reforms transformed the country into one of the most equal and developed on earth. The consistent growth of the welfare state led to Swedes achieving unprecedented levels of social mobility and quality of life—to this day Sweden consistently ranks at the top of league tables for health, literacy and Human Development—far ahead of some wealthier countries (for example the United States).However, from the 1970s and onwards Sweden's GDP growth fell behind other industrialised countries and the country's per capita ranking fell from fourth to 14th place in a few decades. From the mid-1990s until today Sweden's economic growth has once again accelerated and has been higher than in most other industrialised countries (including the US) during the last 15 years. A report from the United Nations Development Program predicted that Sweden's rating on the Human Development Index will fall from 0.949 in 2010 to 0.906 in 2030.Sweden began slowing the expansion of the welfare state in the 1980s, and even trimming it back. Sweden has been relatively quick to adopt neoliberal policies, such as privatisation, financialisation and deregulation, compared to countries such as France. The current Swedish government is continuing the trend of moderate rollbacks of previous social reforms. Growth has been higher than in many other EU-15 countries. Also since the mid-1980s, Sweden has had the fastest growth in inequality of any developed nation, according to the OECD. This has largely been attributed to the reduction in state benefits and a shift toward the privatisation of public services. According to Barbro Sorman, an activist of the opposition Left Party, "The rich are getting richer, and the poor are getting poorer. Sweden is starting to look like the USA." Nevertheless, it remains far more egalitarian than most nations. Partly as a result of these privatisations and widening economic disparity, the Swedes in the 2014 elections put the Social Democrats back in power.Sweden adopted free market agricultural policies in 1990. Since the 1930s, the agricultural sector had been subject to price controls. In June 1990, the Riksdag voted for a new agricultural policy marking a significant shift away from price controls. As a result, food prices fell somewhat. However, the liberalisations soon became moot because EU agricultural controls supervened.Since the late 1960s, Sweden has had the highest tax quota (as percentage of GDP) in the industrialised world, although today the gap has narrowed and Denmark has surpassed Sweden as the most heavily taxed country among developed countries. Sweden has a two-step progressive tax scale with a municipal income tax of about 30% and an additional high-income state tax of 20–25% when a salary exceeds roughly 320,000 SEK per year. Payroll taxes amount to 32%. In addition, a national VAT of 25% is added to many things bought by private citizens, with the exception of food (12% VAT), transportation, and books (6% VAT). Certain items are subject to additional taxes, e.g. electricity, petrol/diesel and alcoholic beverages. +In 2007, total tax revenue was 47.8% of GDP, the second-highest tax burden among developed countries, down from 49.1% 2006. Sweden's inverted tax wedge – the amount going to the service worker's wallet – is approximately 15%, compared to 10% in Belgium, 30% in Ireland, and 50% in the United States. Public sector spending amounts to 53% of the GDP. State and municipal employees total around a third of the workforce, much more than in most Western countries. Only Denmark has a larger public sector (38% of Danish workforce). Spending on transfers is also high. +In 2015 and 2016, 69 per cent of the employed workers is organised in trade unions. Union density in 2016 was 62% among blue-collar-workers (most of them in the Swedish Trade Union Confederation, LO) and 75% among white-collar workers (most of them in the Swedish Confederation of Professional Employees, TCO, and the Swedish Confederation of Professional Associations, SACO). Sweden has state-supported union unemployment funds (Ghent system). Trade unions have the right to elect two representatives to the board in all Swedish companies with more than 25 employees. Sweden has a relatively high amount of sick leave per worker in OECD: the average worker loses 24 days due to sickness.The unemployment rate was 7.2% in May 2017 while the employment rate was 67.4%, with the workforce consisting of 4,983,000 people while 387,000 are unemployed. Unemployment among youth (aged 24 or younger) in 2012 was 24.2%, making Sweden the OECD country with the highest ratio of youth unemployment versus unemployment in general. + + +=== Science and technology === + +In the 18th century, Sweden's scientific revolution took off. Previously, technical progress had mainly come from mainland Europe. +In 1739, the Royal Swedish Academy of Sciences was founded, with people such as Carl Linnaeus and Anders Celsius as early members. Many of the companies founded by early pioneers still remain major international brands. Gustaf Dalén founded AGA, and received the Nobel Prize for his sun valve. Alfred Nobel invented dynamite and instituted the Nobel Prizes. Lars Magnus Ericsson started the company bearing his name, Ericsson, still one of the largest telecom companies in the world. Jonas Wenström was an early pioneer in alternating current and is along with Serbian-American inventor Nikola Tesla credited as one of the inventors of the three-phase electrical system.The traditional engineering industry is still a major source of Swedish inventions, but pharmaceuticals, electronics and other high-tech industries are gaining ground. Tetra Pak was an invention for storing liquid foods, invented by Erik Wallenberg. Losec, an ulcer medicine, was the world's best-selling drug in the 1990s and was developed by AstraZeneca. More recently Håkan Lans invented the Automatic Identification System, a worldwide standard for shipping and civil aviation navigation. A large portion of the Swedish economy is to this day based on the export of technical inventions, and many large multinational corporations from Sweden have their origins in the ingenuity of Swedish inventors.Swedish inventors held 47,112 patents in the United States in 2014, according to the United States Patent and Trademark Office. As a nation, only ten other countries hold more patents than Sweden.Combined, the public and the private sector in Sweden allocate over 3.5% of GDP to research & development (R&D) per year, making Sweden's investment in R&D as a percentage of GDP the second-highest in the world. For several decades the Swedish government has prioritised scientific and R&D activities. As a percentage of GDP, the Swedish government spends the most of any nation on research and development. Sweden tops other European countries in the number of published scientific works per capita.In 2009, the decisions to construct Sweden's two largest scientific installations, the synchrotron radiation facility MAX IV Laboratory and the European Spallation Source (ESS), were taken. Both installations will be built in Lund. The European Spallation Source, costing some SEK 14 billion to construct, will begin initial operations in 2019 with construction completion scheduled for 2025. The ESS will give an approximately 30 times stronger neutron beam than any of today's existing neutron source installations. The MAX IV, costing some SEK 3 billion, was inaugurated on 21 June 2016. Both facilities have strong implications on material research. Sweden was ranked third in the Global Innovation Index in 2022. + + +=== Waste management === +Sweden is known for its efficient waste management system. Only 0.7% of the total household waste is disposed, and the rest is reused. Around 52% its waste is used for energy production (that is burnt) and 47% recycled. About two million tonnes of waste are imported from neighbouring countries to make profitable recycling products. As of 2023 report, Sweden generated 1.7 billion euros in 2020 (the highest so far was 1.98 billion euros in 2016) from recycling waste. The works are mostly executed through the public organisation, Swedish Waste Management (Avfall Sverige). + + +=== Taxes === + +On average, 27% of taxpayers' money in Sweden goes to education and healthcare, whereas 5% goes to the police and military, and 42% to social security.The typical worker receives 40% of his or her labour costs after the tax wedge. Total tax collected by Sweden as a percentage of its GDP peaked at 52.3% in 1990. The country faced a real estate and banking crisis in 1990–1991, and consequently passed tax reforms in 1991 to implement tax rate cuts and tax base broadening over time. Since 1990, taxes as a percentage of GDP collected by Sweden have been dropping, with total tax rates for the highest income earners dropping the most. In 2010, 45.8% of the country's GDP was collected as taxes, the second highest among OECD countries, and nearly double the percentage in the US or South Korea. + + +=== Pensions === + +Every Swedish resident receives a state pension. Swedish Pensions Agency is responsible for pensions. People who have worked in Sweden, but relocated to another country, can also receive the Swedish pension. There are several types of pensions in Sweden: occupational and private pensions, and national retirement. A person can receive a combination of the various types of pensions. + + +== Demographics == + +The total resident population of Sweden was 10,377,781 in October 2020. The population exceeded 10 million for the first time on Friday 20 January 2017.The average population density is just over 25 people per km2 (65 per square mile), with 1 437 persons per km2 in localities (continuous settlement with at least 200 inhabitants)., 87% of the population live in urban areas, which cover 1.5% of the entire land area. 63% of Swedes are in large urban areas. It is substantially higher in the south than in the north. The capital city Stockholm has a municipal population of about 950,000 (with 1.5 million in the urban area and 2.3 million in the metropolitan area). The second- and third-largest cities are Gothenburg and Malmö. Greater Gothenburg counts just over a million inhabitants and the same goes for the western part of Scania, along the Öresund. The Öresund Region, the Danish-Swedish cross-border region around the Öresund that Malmö is part of, has a population of 4 million. Outside of major cities, areas with notably higher population density include the agricultural part of Östergötland, the western coast, the area around Lake Mälaren and the agricultural area around Uppsala. +Norrland, which covers approximately 60% of the Swedish territory, has a very low population density (below 5 people per square kilometre). The mountains and most of the remote coastal areas are almost unpopulated. Low population density exists also in large parts of western Svealand, as well as southern and central Småland. An area known as Finnveden, which is located in the south-west of Småland, and mainly below the 57th parallel, can also be considered as almost empty of people. +Between 1820 and 1930, approximately 1.3 million Swedes, a third of the country's population at the time, emigrated to North America, and most of them to the United States. There are more than 4.4 million Swedish Americans according to a 2006 US Census Bureau estimate. In Canada, the community of Swedish ancestry is 330,000 strong.There are no official statistics on ethnicity, but according to Statistics Sweden, 2,752,572 (26%) inhabitants of Sweden were of a foreign background in 2021, defined as being born abroad or born in Sweden with both foreign-born parents. Of these inhabitants, 2,090,503 persons were born abroad and 662,069 persons were born in Sweden to parents born abroad. In addition, 805,340 persons had one parent born abroad with the other parent born in Sweden.Sweden has one of the oldest populations in the world, with the average age of 41.1 years. + + +=== Language === + +The official language of Sweden is Swedish, a North Germanic language, related and very similar to Danish and Norwegian, but differing in pronunciation and orthography. Norwegians have little difficulty understanding Swedish, and Danes can also understand it, with slightly more difficulty than Norwegians. The same goes for standard Swedish speakers, who find it far easier to understand Norwegian than Danish. The dialects spoken in Scania, the southernmost part of the country, are influenced by Danish because the region traditionally was a part of Denmark and is nowadays situated closely to it. Sweden Finns are Sweden's largest linguistic minority, comprising about 5% of Sweden's population, and Finnish is recognised as a minority language. Owing to a 21st-century influx of native speakers of Arabic, the use of Arabic is likely more widespread in the country than that of Finnish. However, no official statistics are kept on language use.Along with Finnish, four other minority languages are also recognised: Meänkieli, Sami, Romani, and Yiddish. Swedish became Sweden's official language on 1 July 2009, when a new language law was implemented. The issue of whether Swedish should be declared the official language had been raised in the past, and the Riksdag voted on the matter in 2005, but the proposal narrowly failed.In varying degrees, a majority of Swedes, especially those born after World War II, understand and speak English, owing to trade links, the popularity of overseas travel, a strong Anglo-American influence and the tradition of subtitling rather than dubbing foreign television shows and films, and the relative similarity of the two languages which makes learning English easier. In a 2005 survey by Eurobarometer, 89% of Swedes reported the ability to speak English.English became a compulsory subject for secondary school students studying natural sciences as early as 1849, and has been a compulsory subject for all Swedish students since the late 1940s. Depending on the local school authorities, English is currently a compulsory subject between first grade and ninth grade, with all students continuing in secondary school studying English for at least another year. Most students also study one and sometimes two additional languages. Some Danish and Norwegian is also taught as part of Swedish courses for native speakers. Because of the extensive mutual intelligibility between the three continental Scandinavian languages, Swedish speakers often use their native language when visiting or living in Norway or Denmark. + + +=== Religion === + +Before the 11th century, Swedes adhered to Norse paganism, worshiping Æsir gods, with its centre at the Temple in Uppsala. With Christianisation in the 11th century, the laws of the country changed, forbidding worship of other deities until the late 19th century. After the Protestant Reformation in the 1530s, a change led by Martin Luther's Swedish associate Olaus Petri, the authority of the Roman Catholic Church was abolished and Lutheranism became widespread. Adoption of Lutheranism was completed by the Uppsala Synod of 1593, and it became the official religion. During the era following the Reformation, usually known as the period of Lutheran orthodoxy, small groups of non-Lutherans, especially Calvinist Dutchmen, the Moravian Church and French Huguenots played a significant role in trade and industry, and were quietly tolerated as long as they kept a low religious profile. The Sami originally had their own shamanistic religion, but they were converted to Lutheranism by Swedish missionaries in the 17th and 18th centuries. + +With religious liberalisations in the late 18th century believers of other faiths, including Judaism and Roman Catholicism, were allowed to live and work freely in the country. However, until 1860 it remained illegal for Lutherans to convert to another religion. The 19th century saw the arrival of various evangelical free churches, and, towards the end of the century, secularism, leading many to distance themselves from church rituals. Leaving the Church of Sweden became legal with the so-called Dissenter Act of 1860, but only under the provision of entering another Christian denomination. The right to stand outside any religious denomination was formally established in the law on freedom of religion in 1951. +In 2000, the Church of Sweden was disestablished. Sweden was the second Nordic country to disestablish its state church (after Finland did so in the Church Act of 1869).At the end of 2022, 52.8% of Swedes belonged to the Church of Sweden; this number has been decreasing by 1-2 percentage points each year since 2001. Approximately 2% of the church's members regularly attend Sunday services. The reason for the large number of inactive members is partly that, until 1996, children automatically became members at birth if at least one of the parents was a member. Since 1996, only children and adults who are christened become members. Some 275,000 Swedes are today members of various Evangelical Protestant free churches (where congregation attendance is much higher), and due to recent immigration, there are now some 100,000 Eastern Orthodox Christians and 92,000 Roman Catholics living in Sweden.The first Muslim congregation was established in 1949, when a small contingent of Tatars migrated from Finland. Islam's presence in Sweden remained marginal until the 1960s, when Sweden started to receive migrants from the Balkans and Turkey. Further immigration from North Africa and the Middle East have brought the estimated Muslim population to 600,000. However, only about 110,000 were members of a congregation around 2010.According to the Eurobarometer Poll 2010, +18% of Swedish citizens responded that "they believe there is a god". +45% answered that "they believe there is some sort of spirit or life force". +34% answered that "they do not believe there is any sort of spirit, god, or life force".According to a Demoskop study in 2015, the beliefs of the Swedish showed that + +21% believed in a god (down from 35 percent in 2008). +16% believed in ghosts. +14% believed in creationism or intelligent design.Sociology professor Phil Zuckerman claims that Swedes, despite a lack of belief in God, commonly question the term atheist, preferring to call themselves Christians while being content with remaining in the Church of Sweden. Religion continues to play a role in Swedish cultural identity. This is evidenced by the fact that the majority of Swedish adults continue to remain members of the Lutheran Church despite having to pay a church tax; moreover, rates of baptism remain high and church weddings are increasing in Sweden. + + +=== Health === + +Healthcare in Sweden is mainly tax-funded, universal for all citizens, and decentralised, although private health care also exists. The health care system in Sweden is financed primarily through taxes levied by regional councils and municipalities. A total of 21 councils are in charge of primary and hospital care within the country. +Private healthcare is a rarity in Sweden, and even those private institutions work under the mandated city councils. The city councils regulates the rules and the establishment of potential private practices. While care for the elderly or those who need psychiatric help is conducted privately in many other countries, in Sweden, publicly funded local authorities are in charge of this type of care.Healthcare in Sweden is similar in quality to other developed nations. Sweden ranks in the top five countries with respect to low infant mortality. It also ranks high in life expectancy and in safe drinking water. In 2018, health and medical care represented around 11% of GDP. + + +=== Education === + +Children aged 1–5 years old are guaranteed a place in a public kindergarten (Swedish: förskola or, colloquially, dagis). Between the ages of 6 and 16, children attend compulsory comprehensive school. In the Programme for International Student Assessment (PISA), Swedish 15-year-old pupils score close to the OECD average. After completing the ninth grade, about 90% of the students continue with a three-year upper secondary school (gymnasium), which can lead to both a job qualification or entrance eligibility to university. The school system is largely financed by taxes. +The Swedish government treats public and independent schools equally by introducing education vouchers in 1992 as one of the first countries in the world after the Netherlands. Anyone can establish a for-profit school and the municipality must pay new schools the same amount as municipal schools get. School lunch is free for all students in Sweden, and providing breakfast is also encouraged.There are a number of different universities and colleges in Sweden, the oldest and largest of which are situated in Uppsala, Lund, Gothenburg and Stockholm. In 2000, 32% of Swedish people held a tertiary degree, making the country fifth in the OECD in that category. Along with several other European countries, the government also subsidises tuition of international students pursuing a degree at Swedish institutions, although a recent bill passed in the Riksdag will limit this subsidy to students from EEA countries and Switzerland.The large influx of immigrants to Swedish schools has been cited as a significant part of the reason why Sweden has dropped more than any other European country in the international PISA rankings. + + +== Culture == + +Sweden has many authors of worldwide recognition including August Strindberg, Astrid Lindgren, and Nobel Prize winners Selma Lagerlöf and Harry Martinson. In total seven Nobel Prizes in Literature have been awarded to Swedes. The nation's most well-known artists are painters such as Carl Larsson and Anders Zorn, and the sculptors Tobias Sergel and Carl Milles. +Swedish 20th-century culture is noted by pioneering works in the early days of cinema, with Mauritz Stiller and Victor Sjöström. In the 1920s–1980s, the filmmaker Ingmar Bergman and actors Greta Garbo and Ingrid Bergman became internationally noted people within cinema. More recently, the films of Lukas Moodysson, Lasse Hallström, and Ruben Östlund have received international recognition. +Throughout the 1960s and 1970s, Sweden was seen as an international leader in what is now referred to as the "sexual revolution", with gender equality having particularly been promoted. The early Swedish film I Am Curious (Yellow) (1967) reflected a liberal view of sexuality, including scenes of love making that caught international attention, and introduced the concept of the "Swedish sin" that had been introduced earlier in the US with Ingmar Bergman's Summer with Monika. +The image of "hot love and cold people" emerged. Sexual liberalism was seen as part of modernisation process that by breaking down traditional borders would lead to the emancipation of natural forces and desires.Sweden has also become very liberal towards homosexuality, as is reflected in the popular acceptance of films such as Show Me Love, which is about two young lesbians in the small Swedish town of Åmål. Since 1 May 2009, Sweden repealed its "registered partnership" laws and fully replaced them with gender-neutral marriage. Sweden also offers domestic partnerships for both same-sex and opposite-sex couples. Cohabitation (sammanboende) by couples of all ages, including teenagers as well as elderly couples, is widespread. As of 2009, Sweden is experiencing a baby boom. + + +=== Music === + +Historical re-creations of Norse music have been attempted based on instruments found in Viking sites. The instruments used were the lur (a sort of trumpet), simple string instruments, wooden flutes and drums. Sweden has a significant folk-music scene. The joik, a type of Sami music, is a chant that is part of the traditional Sami animistic spirituality. Notable composers include Carl Michael Bellman and Franz Berwald. +Sweden also has a prominent choral music tradition. Out of a population of 9.5 million, it is estimated that five to six hundred thousand people sing in choirs.In 2007, with over 800 million dollars in revenue, Sweden was the third-largest music exporter in the world and surpassed only by the US and the UK. According to one source 2013, Sweden produces the most chart hits per capita in the world, followed by the UK and the USA. +Sweden has a rather lively jazz scene. During the last sixty years or so it has attained a remarkably high artistic standard, stimulated by domestic as well as external influences and experiences. The Centre for Swedish Folk Music and Jazz Research has published an overview of jazz in Sweden by Lars Westin. + + +=== Architecture === + +Before the 13th century almost all buildings were made of timber, but a shift began towards stone. Early Swedish stone buildings are the Romanesque churches on the countryside. As so happens, many of them were built in Scania and are in effect Danish churches. This would include the Lund Cathedral from the 11th century and the somewhat younger church in Dalby, but also many early Gothic churches built through influences of the Hanseatic League, such as in Ystad, Malmö and Helsingborg. +Cathedrals in other parts of Sweden were also built as seats of Sweden's bishops. The Skara Cathedral is of bricks from the 14th century, and the Uppsala Cathedral in the 15th. In 1230 the foundations of the Linköping Cathedral were made, the material was there limestone, but the building took some 250 years to finish. +Among older structures are also some significant fortresses and other historical buildings such as at Borgholm Castle, Halltorps Manor and Eketorp fortress on the island Öland, the Nyköping fortress and the Visby city wall. + +Around 1520 Sweden was out of the Middle Ages and united under King Gustav Vasa, who immediately initiated grand mansions, castles and fortresses to be built. Some of the more magnificent include Kalmar Castle, Gripsholm Castle and the one at Vadstena. +In the next two centuries, Sweden was designated by Baroque architecture and later the rococo. Notable projects from that time include the city Karlskrona, which has now also been declared a World Heritage Site and the Drottningholm Palace. +1930 was the year of the great Stockholm exhibition, which marked the breakthrough of Functionalism, or funkis as it became known. The style came to dominate in the following decades. Some notable projects of this kind were the Million Programme, offering affordable living in large apartment complexes. +The Ericsson Globe, located in Stockholm, is the largest hemispherical building on Earth. Its dome has a diameter of 110 metres (360 feet) and took two and a half years to build. + + +=== Media === + +Swedes are among the greatest consumers of newspapers in the world, and nearly every town is served by a local paper. The country's main quality morning papers are Dagens Nyheter (liberal), Göteborgs-Posten (liberal), Svenska Dagbladet (liberal conservative) and Sydsvenska Dagbladet (liberal). The two largest evening tabloids are Aftonbladet (social democratic) and Expressen (liberal). The ad-financed, free international morning paper, Metro International, was founded in Stockholm, Sweden. The country's news is reported in English by, among others, The Local (liberal).The public broadcasting companies held a monopoly on radio and television for a long time in Sweden. Licence-funded radio broadcasts started in 1925. A second radio network was started in 1954, and a third opened 1962, in response to pirate radio stations. Non-profit community radio was allowed in 1979 and in 1993 commercial local radio started. +The licence-funded television service was officially launched in 1956. A second channel, TV2, was launched in 1969. These two channels (operated by Sveriges Television since the late 1970s) held a monopoly until the 1980s when cable and satellite television became available. The first Swedish-language satellite service was TV3 which started broadcasting from London in 1987. It was followed by Kanal 5 in 1989 (then known as Nordic Channel) and TV4 in 1990. +In 1991 the government announced it would begin taking applications from private television companies wishing to broadcast on the terrestrial network. TV4, which had previously been broadcasting via satellite, was granted a permit and began its terrestrial broadcasts in 1992, becoming the first private channel to broadcast television content from within the country. +Around half the population are connected to cable television. Digital terrestrial television in Sweden started in 1999 and the last analogue terrestrial broadcasts were terminated in 2007. + + +=== Literature === + +The first literary text from Sweden is the Rök runestone, carved during the Viking Age c. 800 AD. With the conversion of the land to Christianity around 1100 AD, Sweden entered the Middle Ages, during which monastic writers preferred to use Latin. Therefore, there are only a few texts in the Old Swedish from that period. Swedish literature only began to flourish when the language was standardised during the 16th century. This standardisation was largely due to the full translation of the Bible into Swedish in 1541. This translation is the so-called Gustav Vasa Bible. +With improved education and the freedom brought by secularisation, the 17th century saw several notable authors develop the Swedish language further. Some key figures include Georg Stiernhielm (17th century), who was the first to write classical poetry in Swedish; Johan Henric Kellgren (18th century), the first to write fluent Swedish prose; Carl Michael Bellman (late 18th century), the first writer of burlesque ballads; and August Strindberg (late 19th century), a socio-realistic writer and playwright who won worldwide fame. The early 20th century continued to produce notable authors, such as Selma Lagerlöf, (Nobel laureate 1909), Verner von Heidenstam (Nobel laureate 1916) and Pär Lagerkvist (Nobel laureate 1951). +In recent decades, a handful of Swedish writers have established themselves internationally, including the detective novelist Henning Mankell and the writer of spy fiction Jan Guillou. The Swedish writer to have made the most lasting impression on world literature is the children's book writer Astrid Lindgren, and her books about Pippi Longstocking, Emil, and others. In 2008, the second best-selling fiction author in the world was Stieg Larsson, whose Millennium series of crime novels is being published posthumously to critical acclaim. Larsson drew heavily on the work of Lindgren by basing his central character, Lisbeth Salander, on Longstocking. + + +=== Holidays === + +Apart from traditional Protestant Christian holidays, Sweden also celebrates some unique holidays, some of a pre-Christian tradition. They include Midsummer celebrating the summer solstice; Walpurgis Night (Valborgsmässoafton) on 30 April lighting bonfires; and Labour Day or May Day on 1 May is dedicated to socialist demonstrations. The day of giver-of-light Saint Lucia, 13 December, is widely acknowledged in elaborate celebrations which betoken its Italian origin and commence the month-long Christmas season. +6 June is the National Day of Sweden and has since 2005 been a public holiday. Furthermore, there are official flag flying day observances and a Namesdays in Sweden calendar. In August many Swedes have kräftskivor (crayfish dinner parties). Martin of Tours Eve is celebrated in Scania in November with Mårten Gås parties, where roast goose and svartsoppa ('black soup', made of goose stock, fruit, spices, spirits and goose blood) are served. The Sami, one of Sweden's indigenous minorities, have their holiday on 6 February and Scania celebrate their Scanian Flag day on the third Sunday in July. + + +=== Cuisine === + +Swedish cuisine, like that of the other Nordic countries (Denmark, Norway and Finland), was traditionally simple. Fish (particularly herring), meat, potatoes and dairy products played prominent roles. Spices were sparse. Preparations include Swedish meatballs, traditionally served with gravy, boiled potatoes and lingonberry jam; pancakes; pyttipanna, a spiced fried hash of meat and potatoes originally meant to use up any leftovers of meat; lutfisk; and the smörgåsbord, or lavish buffet. Akvavit is a popular alcoholic distilled beverage, and the drinking of snaps is of cultural importance. The traditional flat and dry crisp bread has developed into several contemporary variants. Regionally important foods are the surströmming (a fermented fish) in northern Sweden and eel in southern Sweden. +Swedish traditional dishes, some of which are many hundreds of years old, are still an important part of Swedish everyday meals, in spite of the fact that modern-day Swedish cuisine adopts many international dishes. +In August, at the traditional feast known as crayfish party, kräftskiva, Swedes eat large amounts of crayfish boiled with dill. + + +=== Cinema === + +Swedes have been fairly prominent in the film area through the years. A number of Swedish people have found success in Hollywood, including Ingrid Bergman, Greta Garbo and Max von Sydow. Amongst several directors who have made internationally successful films can be mentioned Ingmar Bergman, Lukas Moodysson and Lasse Hallström. + + +=== Fashion === +Interest in fashion is big in Sweden and the country headquarters famous brands like Hennes & Mauritz (operating as H&M), J. Lindeberg (operating as JL), Acne, Lindex, Odd Molly, Cheap Monday, Gant, WESC, Filippa K, and Nakkna within its borders. These companies, however, are composed largely of buyers who import fashionable goods from throughout Europe and America, continuing the trend of Swedish business toward multinational economic dependency like many of its neighbours. + + +=== Sports === + +Sport activities are a national movement with half of the population actively participating in organised sporting activities. The two main spectator sports are football and ice hockey. Second to football, horse sports (of which most of the participants are women) have the highest number of practitioners. Thereafter, golf, orienteering, gymnastics, track and field, and the team sports of ice hockey, handball, floorball, basketball and bandy are the most popular in terms of practitioners. +The Swedish national men's ice hockey team, affectionately known as Tre Kronor (English: Three Crowns; the national symbol of Sweden), is regarded as one of the best in the world. The team has won the World Championships nine times, placing them third in the all-time medal count. Tre Kronor also won Olympic gold medals in 1994 and 2006. In 2006, Tre Kronor became the first national hockey team to win both the Olympic and world championships in the same year. The Swedish national men's football team has seen some success at the World Cup in the past, finishing second when they hosted the tournament in 1958, and third twice, in 1950 and 1994. +Sweden hosted the 1912 Summer Olympics, Equestrian at the 1956 Summer Olympics and the FIFA World Cup in 1958. Other big sports events include the UEFA Euro 1992, 1995 FIFA Women's World Cup, 1995 World Championships in Athletics, UEFA Women's Euro 2013, and several championships of ice hockey, curling, athletics, skiing, bandy, figure skating and swimming. +In 2016, the Swedish Poker Federation (Svepof) joined The International Federation of Poker (IFP). + + +== See also == +List of Sweden-related topics +Outline of Sweden +329 Svea + + +== Notes == + + +== References == + + +== Further reading == + + +== External links == + +Sweden. The World Factbook. Central Intelligence Agency. +Sweden entry at Britannica.com +Gosse, Edmund William (1887). "Sweden" . Encyclopædia Britannica. Vol. XXII (9th ed.). pp. 736–758. +Gosse, Edmund William; and four others (1911). "Sweden" . Encyclopædia Britannica. Vol. 26 (11th ed.). pp. 188–221. +Hildebrand, Karl (1922). "Sweden" . Encyclopædia Britannica. Vol. 32 (12th ed.). +Sweden from UCB Libraries GovPubs +Sweden at Curlie +Sweden profile from the BBC News + Wikimedia Atlas of Sweden + Geographic data related to Sweden at OpenStreetMap +Key Development Forecasts for Sweden from International Futures +Study in Sweden – official guide to studying in Sweden +Wayback Machine Technological Waves and Economic Growth in Sweden 1850–2005 +Sweden – Economic Growth and Structural Change, 1800–2000 — EH.Net Encyclopedia +vifanord – a digital library that provides scientific information on the Nordic and Baltic countries as well as the Baltic region as a wholePublic sector + +Sweden.se — Sweden's official portal +The Swedish Parliament – official website +The Government of Sweden – official website +The Royal Court Archived 11 October 2016 at the Wayback Machine – official website of the Swedish MonarchyNews media + +Radio Sweden – public service +Sveriges Television (in Swedish) – public service +Dagens Nyheter (in Swedish) +Svenska Dagbladet (in Swedish) +The Local – Sweden's news in English – independent English language news siteTrade + +World Bank Summary Trade Statistics SwedenTravel + +VisitSweden.com – official travel and tourism website for Sweden \ No newline at end of file diff --git a/notebooks/toy_data/Titanic_film.txt b/notebooks/toy_data/Titanic_film.txt new file mode 100644 index 000000000..2ebd96f4b --- /dev/null +++ b/notebooks/toy_data/Titanic_film.txt @@ -0,0 +1,203 @@ +Titanic is a 1997 American epic romance and disaster film directed, written, produced, and co-edited by James Cameron. Incorporating both historical and fictionalized aspects, it is based on accounts of the sinking of RMS Titanic in 1912. Kate Winslet and Leonardo DiCaprio star as members of different social classes who fall in love during the ship's maiden voyage. The film also features Billy Zane, Kathy Bates, Frances Fisher, Gloria Stuart, Bernard Hill, Jonathan Hyde, Victor Garber, and Bill Paxton. +Cameron's inspiration for the film came from his fascination with shipwrecks; he felt a love story interspersed with the human loss would be essential to convey the emotional impact of the disaster. Production began on September 1, 1995, when Cameron shot footage of the actual Titanic wreck. The modern scenes on the research vessel were shot on board the Akademik Mstislav Keldysh, which Cameron had used as a base when filming the wreck. Scale models, computer-generated imagery, and a reconstruction of the Titanic built at Baja Studios were used to re-create the sinking. The film was co-financed by Paramount Pictures and 20th Century Fox; the former handled distribution in North America while the latter released the film internationally. It was the most expensive film ever made at the time, with a production budget of $200 million. Filming took place from July 1996 to March 1997. +Titanic was released on December 19, 1997. It was praised for its visual effects, performances (particularly those of DiCaprio, Winslet, and Stuart), production values, direction, score, cinematography, story, and emotional depth. Among other awards, it was nominated for 14 Academy Awards and won 11, including Best Picture and Best Director, tying Ben-Hur (1959) for the most Academy Awards won by a film. With an initial worldwide gross of over $1.84 billion, Titanic was the first film to reach the billion-dollar mark. It was the highest-grossing film of all time until Cameron's next film, Avatar (2009), surpassed it in 2010. A number of re-releases have pushed the film's worldwide total to $2.257 billion, making it the second film to gross more than $2 billion worldwide after Avatar. It was selected for preservation in the United States National Film Registry in 2017. + + +== Plot == +In 1996, aboard the research vessel Akademik Mstislav Keldysh, Brock Lovett and his team search the wreck of RMS Titanic. They recover a safe they hope contains a necklace with a large diamond known as the Heart of the Ocean. Instead, they find only a drawing of a young nude woman wearing the necklace. The sketch is dated April 14, 1912, the same day the Titanic struck the iceberg that caused it to sink. After viewing a television news story about the discovery, centenarian Rose Dawson Calvert contacts Lovett, identifying herself as the woman in the drawing. Hoping she can help locate the necklace, Lovett brings Rose aboard Keldysh, where she recounts her experiences as a Titanic passenger. +In 1912 Southampton, 17-year-old Rose DeWitt Bukater, her wealthy fiancé Caledon "Cal" Hockley, and Rose's widowed mother Ruth, board the Titanic. Ruth emphasizes that Rose's marrying Cal will resolve the family's financial problems and maintain their upper-class status. Meanwhile, Jack Dawson, a poor young artist, wins a third-class Titanic ticket in a poker game. After setting sail, Rose, distraught over her loveless engagement, climbs over the stern railing, intending to commit suicide. Jack coaxes her back onto the deck and they develop a friendship. Jack soon admits that he has feelings for Rose. When Cal and Ruth strongly object, Rose rejects Jack's attentions, but returns to him after realizing she has fallen in love. +Rose brings Jack to her state room and requests he draw her nude, wearing only the Heart of the Ocean. They later evade Cal's servant, Lovejoy, and have sex in a Renault Towncar inside the cargo hold. On the forward deck, they witness the ship's collision with an iceberg and overhear its officers discussing its seriousness. Cal discovers Jack's sketch and an insulting note from Rose in his safe, along with the necklace. When Jack and Rose return to warn the others about the collision, Cal has Lovejoy slip the necklace into Jack's pocket to frame him for theft. Jack is confined in the master-at-arms' office. Cal puts the necklace into his own overcoat pocket. +With the ship sinking, the crew prioritize women and children for evacuation. Rose finds and frees Jack, and they make it back to the deck, where Cal and Jack urge Rose to board a lifeboat. Intending to save himself, Cal lies that he will get Jack safely off the ship and wraps his overcoat around Rose. As her lifeboat is lowered, Rose, unable to abandon Jack, jumps back onto the ship. Cal grabs Lovejoy's pistol and chases Jack and Rose, but they escape; Cal realizes the necklace is still in the coat he gave Rose. He later poses as a lost child's father to get onboard a lifeboat. +Jack and Rose return to the deck. The ship's stern is rising as the flooded bow sinks; the two desperately cling to the stern rail. The upended ship breaks in half and the bow section sinks. The remaining stern slams back onto the ocean, then upends again before it, too, sinks. In the freezing water, Jack helps Rose onto a wood transom panel among the debris, buoyant enough only for one person, and makes her promise to survive. Jack dies of cold shock, but Rose is among six people saved by the one returning lifeboat. +RMS Carpathia rescues the survivors. Rose avoids Cal and her mother by hiding among the steerage passengers and giving her name as Rose Dawson. Still wearing Cal's overcoat, she discovers the necklace tucked inside the pocket. In the present, Rose says she later heard that Cal committed suicide after losing his fortune in the Wall Street Crash of 1929. Lovett abandons his search after hearing Rose's story. Alone on the stern of Keldysh, Rose takes the Heart of the Ocean, which has been in her possession all along, and drops it into the sea over the wreck site. While she is seemingly asleep in her bed, her photos on the dresser depicting a life of freedom and adventure inspired by her early conversations with Jack. A young Rose reunites with Jack at the Titanic's Grand Staircase, applauded by those who died that night. + + +== Cast == + + +=== Fictional characters === + +Kate Winslet as Rose DeWitt Bukater: Cameron said Winslet "had the thing that you look for" and that there was "a quality in her face, in her eyes," that he "just knew people would be ready to go the distance with her". Rose is a 17-year-old girl, originally from Philadelphia, who is forced into an engagement to 30-year-old Cal Hockley so she and her mother, Ruth, can maintain their high-class status after her father's death had left the family debt-ridden. Rose boards Titanic with Cal and Ruth, as a first-class passenger, and meets Jack. Winslet said of her character, "She has got a lot to give, and she's got a very open heart. And she wants to explore and adventure the world, but she [feels] that's not going to happen." Gwyneth Paltrow, Winona Ryder, Claire Danes, Gabrielle Anwar, and Reese Witherspoon had been considered for the role. When they turned it down, Winslet campaigned heavily for the role. She sent Cameron daily notes from England, which led Cameron to invite her to Hollywood for auditions. As with DiCaprio, casting director Mali Finn originally brought her to Cameron's attention. When looking for a Rose, Cameron described the character as "an Audrey Hepburn type" and was initially uncertain about casting Winslet even after her screen test impressed him. After she screen tested with DiCaprio, Winslet was so thoroughly impressed with him, that she whispered to Cameron, "He's great. Even if you don't pick me, pick him." Winslet sent Cameron a single rose with a card signed, "From Your Rose", and lobbied him by phone. "You don't understand!" she pleaded one day when she reached him by mobile phone in his Humvee. "I am Rose! I don't know why you're even seeing anyone else!" Her persistence, as well as her talent, eventually convinced him to cast her in the role. +Leonardo DiCaprio as Jack Dawson: Cameron said he needed the cast to feel they were really on the Titanic, to relive its liveliness, and "to take that energy and give it to Jack, ... an artist who is able to have his heart soar". Jack is portrayed as an itinerant, poor orphan from Chippewa Falls, Wisconsin, who has travelled the world, including Paris. He wins two third-class tickets for the Titanic in a poker game and travels with his friend Fabrizio. He is attracted to Rose at first sight. Her fiancé's invitation to dine with them the next evening enables Jack to mix with the first-class passengers for a night. Cameron's original choice for the role was River Phoenix, however he died in 1993. Though established actors like Matthew McConaughey, Chris O'Donnell, Billy Crudup, and Stephen Dorff were considered, Cameron felt they were too old for the part of a 20-year-old. Tom Cruise was interested, but his asking price was too high. Cameron considered Jared Leto for the role, but Leto refused to audition. Jeremy Sisto did a series of screen tests with Winslet and three other actresses vying for the role of Rose. DiCaprio, 21 years old at the time, was brought to Cameron's attention by casting director Mali Finn. Initially, he did not want the role and refused to read his first romantic scene (see below). Cameron said, "He read it once, then started goofing around, and I could never get him to focus on it again. But for one split second, a shaft of light came down from the heavens and lit up the forest." Cameron strongly believed in DiCaprio's acting ability and told him, "Look, I'm not going to make this guy brooding and neurotic. I'm not going to give him a tic and a limp and all the things you want." Cameron envisioned the character as being like those played by James Stewart or Gregory Peck. Although Jack Dawson was a fictional character, in Fairview Cemetery in Halifax, Nova Scotia, where 121 victims are buried, there is a grave labeled "J. Dawson". The real J. Dawson was Joseph Dawson, a trimmer in the engine room. "It wasn't until after the movie came out that we found out that there was a J. Dawson gravestone," said the film's producer, Jon Landau, in an interview. +Billy Zane as Caledon Hockley: Caledon is Rose's arrogant and snobbish 30-year-old fiancé, who is the heir to a Pittsburgh steel fortune. He becomes increasingly embarrassed by, jealous of, and cruel about Rose's affection for Jack. The part was originally offered to Matthew McConaughey, and Rob Lowe has also gone on the record as having pursued it. +Frances Fisher as Ruth DeWitt Bukater: Rose's widowed mother, who arranges her daughter's engagement to Cal to maintain her family's high-society status. Like many aristocratic passengers portrayed in the film, her disposition is elitist and frivolous. She loves her daughter but believes that social position is more important than having a loving marriage. She strongly dislikes Jack, even though he saved her daughter's life. +Gloria Stuart as Rose Dawson Calvert: Rose narrates the film in a modern-day framing device. Cameron stated, "In order to see the present and the past, I decided to create a fictional survivor who is [close to] 101 years, and she connects us in a way through history." The 100-year-old Rose gives Lovett information regarding the "Heart of the Ocean" after he discovers a nude drawing of her in the wreck. She shares the story of her time aboard the ship, and speaks about her relationship with Jack for the first time since the sinking. At 87, Stuart had to be made up to look older for the role. Of casting Stuart, Cameron stated, "My casting director found her. She was sent out on a mission to find retired actresses from the Golden Age of the thirties and forties." Cameron said that he did not know who Stuart was, and Fay Wray was also considered for the role. "But [Stuart] was just so into it, and so lucid, and had such a great spirit. And I saw the connection between her spirit and [Winslet's] spirit," stated Cameron. "I saw this joie de vivre in both of them, that I thought the audience would be able to make that cognitive leap that it's the same person." +Bill Paxton as Brock Lovett: A treasure hunter looking for the "Heart of the Ocean" in the wreck of the Titanic in the present. Time and funding for his expedition are running out. He later reflects at the film's conclusion that, despite thinking about Titanic for three years, he has never understood it until he hears Rose's story. +Suzy Amis as Lizzy Calvert: Rose's granddaughter, who accompanies her when she visits Lovett on the ship and learns of her grandmother's romantic past with Jack Dawson. +Danny Nucci as Fabrizio: Jack's Italian best friend, who boards Titanic with him after Jack wins two tickets in a poker game. Fabrizio fails to board a lifeboat when the Titanic sinks and is killed when one of the ship's funnels breaks and crashes into the water, crushing him and several other passengers to death. +David Warner as Spicer Lovejoy: An ex-Pinkerton constable, Lovejoy is Cal's English valet and bodyguard, who keeps an eye on Rose and is suspicious about the circumstances surrounding Jack rescuing her. He dies when the Titanic splits in half, causing him to fall into a massive opening. Warner had appeared in the 1979 TV miniseries S.O.S. Titanic. +Jason Barry as Tommy Ryan: An Irish third-class passenger who befriends Jack and Fabrizio. Tommy is killed when he is accidentally pushed forward and shot by a panicked First Officer Murdoch. + + +=== Historical characters === +Although not intended to be an entirely accurate depiction of events, the film includes portrayals of several historical figures: + +Kathy Bates as Margaret "Molly" Brown: Brown is looked down upon by other first-class women, including Ruth, as "vulgar" and "new money". She is friendly to Jack and lends him a suit of evening clothes (bought for her son) when he is invited to dinner in the first-class dining saloon. She was dubbed "The Unsinkable Molly Brown" by historians because, with the support of other women, she commandeered Lifeboat 6 from Quartermaster Robert Hichens. Some aspects of this altercation are portrayed in Cameron's film. Reba McEntire was offered the role, but had to turn it down, because it conflicted with her touring schedule. +Victor Garber as Thomas Andrews: The ship's builder, Andrews is portrayed as a kind, decent man who is modest about his grand achievement. After the collision, he tries to convince the others, particularly Ismay, that it is a "mathematical certainty" that the ship will sink. He is depicted during the sinking of the ship as standing next to the clock in the first-class smoking room, lamenting his failure to build a strong and safe ship. Although this has become one of the most famous legends of the sinking of the Titanic, this story, which was published in a 1912 book (Thomas Andrews: Shipbuilder) and therefore perpetuated, came from John Stewart, a steward on the ship who in fact left the ship in boat no.15 at approximately 1:40 a.m. There were testimonies of sightings of Andrews after that moment. It appears that Andrews stayed in the smoking room for some time to gather his thoughts, then he continued assisting with the evacuation. +Bernard Hill as Captain Edward John Smith: Smith planned to make the Titanic his final voyage before retiring. He retreats into the wheelhouse on the bridge as the ship sinks, dying when the windows burst from the water pressure whilst he clings to the ship's wheel. There are conflicting accounts as to whether he died in this manner or later froze to death in the water near the capsized collapsible lifeboat "B". +Jonathan Hyde as J. Bruce Ismay: White Star Line's ignorant, boorish managing director, who influences Captain Smith to go faster with the prospect of an earlier arrival in New York and favorable press attention; while this action appears in popular portrayals of the disaster, it is unsupported by evidence. After the collision, he struggles to comprehend that his "unsinkable" ship is doomed. Ismay later boards Collapsible C (one of the last lifeboats to leave the ship) just before it is lowered. He was branded a coward by the press and public for surviving the disaster while many women and children had drowned. +Eric Braeden as John Jacob Astor IV: A first-class passenger whom Rose (correctly) calls the richest man on the ship. The film depicts Astor and his 18-year-old wife Madeleine (Charlotte Chatton) as being introduced to Jack by Rose in the first-class dining saloon. During the introduction, Astor asks if Jack is connected to the "Boston Dawsons", a question Jack deflects by saying that he is instead affiliated with the Chippewa Falls Dawsons. Astor is last seen as the Grand Staircase glass dome implodes and water surges in. +Bernard Fox as Colonel Archibald Gracie IV: The film depicts Gracie making a comment to Cal that "women and machinery don't mix", and congratulating Jack for saving Rose from falling off the ship, though he is unaware that it was a suicide attempt. He is later seen offering to lead Jack and Rose to the remaining lifeboats during the sinking. Fox had portrayed Frederick Fleet in the 1958 film A Night to Remember. +Michael Ensign as Benjamin Guggenheim: A mining magnate traveling in first-class. He shows off his French mistress Madame Aubert (Fannie Brett) to his fellow passengers while his wife and three daughters wait for him at home. When Jack joins the other first-class passengers for dinner after his rescue of Rose, Guggenheim refers to him as a "bohemian". He is seen in the flooding Grand Staircase during the sinking, saying he is prepared to go down as a gentleman. +Jonathan Evans-Jones as Wallace Hartley: The ship's bandmaster and violinist who plays uplifting music with his colleagues on the boat deck as the ship sinks. As the final plunge begins, he leads the band in a final performance of "Nearer, My God, to Thee", to the tune of Bethany, and dies in the sinking. +Mark Lindsay Chapman as Chief Officer Henry Wilde: The ship's chief officer, who lets Cal on board a lifeboat because he has a child in his arms. Before he dies, he tries to get the boats to return to the sinking site to rescue passengers by blowing his whistle. After he freezes to death, Rose uses his whistle to attract the attention of Fifth Officer Lowe, which leads to her rescue. +Ewan Stewart as First Officer William Murdoch: The officer who is put in charge of the bridge on the night the ship struck the iceberg. During a rush for the lifeboats, Murdoch shoots Tommy Ryan, as well as another passenger, in a momentary panic, then commits suicide by shooting himself in the head. When Murdoch's nephew Scott saw the film, he objected to his uncle's portrayal as damaging to Murdoch's heroic reputation. A few months later, Fox vice-president Scott Neeson went to Dalbeattie, Scotland, where Murdoch lived, to deliver a personal apology, and also presented a £5000 donation to Dalbeattie High School to boost the school's William Murdoch Memorial Prize. Cameron apologized on the DVD commentary, but stated that there were officers who fired gunshots to enforce the "women and children first" policy. According to Cameron, his depiction of Murdoch is that of an "honorable man," not of a man "gone bad" or of a "cowardly murderer." He added, "I'm not sure you'd find that same sense of responsibility and total devotion to duty today. This guy had half of his lifeboats launched before his counterpart on the port side had even launched one. That says something about character and heroism." +Jonathan Phillips as Second Officer Charles Lightoller. Lightoller took charge of the port side evacuation. The film depicts Lightoller informing Captain Smith that it will be difficult to see icebergs without breaking water, and following the collision, suggesting the crew should begin boarding women and children to the lifeboats. He is seen brandishing a gun and threatening to use it to keep order. He can be seen on top of Collapsible B when the first funnel collapses. Lightoller was the most senior officer to have survived the disaster. +Simon Crane as Fourth Officer Joseph Boxhall: The officer in charge of firing flares and manning Lifeboat 2 during the sinking. He is shown on the bridge wings helping the seamen firing the flares. +Ioan Gruffudd as Fifth Officer Harold Lowe: The ship's only officer to lead a lifeboat to retrieve survivors of the sinking from the icy waters. The film depicts Lowe rescuing Rose. +Edward Fletcher as Sixth Officer James Moody: The ship's only junior officer to have died in the sinking. The film depicts Moody admitting Jack and Fabrizio onto the ship only moments before it departs from Southampton. Moody is later shown following Murdoch's orders to put the ship to full speed ahead, and informs Murdoch about the iceberg. He is last seen clinging to one of the davits on the starboard side after having unsuccessfully attempted to launch collapsible A. +James Lancaster as Father Thomas Byles: Second-class passenger Father Byles, a Catholic priest from England, is portrayed praying and consoling passengers during the ship's final moments. +Lew Palter and Elsa Raven as Isidor and Ida Straus: Isidor is a former owner of R.H. Macy and Company, a former congressman from New York, and a member of the New York and New Jersey Bridge Commission. During the sinking, the couple were offered seats on a lifeboat together. Isidor refused to go before all women and children have been evacuated, and urged his wife Ida to go ahead. Ida is portrayed refusing to board the lifeboat, saying that she will honor her wedding pledge by staying with Isidor. They are last seen lying on their bed, embracing each other as water fills their stateroom. +Martin Jarvis as Sir Cosmo Duff-Gordon: A Scottish baronet who is rescued in Lifeboat 1. Lifeboats 1 and 2 were emergency boats with a capacity of 40. Situated at the forward end of the boat deck, these were kept ready to launch in case of a person falling overboard. On the night of the disaster, Lifeboat 1 was the fourth to be launched, with 12 people aboard, including Duff-Gordon, his wife and her secretary. The baronet was much criticized for his conduct during the incident. It was suggested that he had boarded the emergency boat in violation of the "women and children first" policy, and that the boat had failed to return to rescue those struggling in the water. He offered five pounds to each of the lifeboat's crew, which those critical of his conduct viewed as a bribe. The Duff-Gordons at the time (and his wife's secretary in a letter written at the time and rediscovered in 2007) stated that there had been no women or children waiting to board in the vicinity of the launching of their boat; there is confirmation that lifeboat 1 of the Titanic was almost empty, and that First Officer William Murdoch was apparently glad to offer Duff-Gordon and his wife and her secretary a place (simply to fill it) after they had asked if they could get on. Duff-Gordon denied that his offer of money to the lifeboat crew represented a bribe. The British Board of Trade's inquiry into the disaster accepted Duff-Gordon's denial of bribing the crew, but maintained that, if the emergency boat had rowed towards the people who were in the water, it might very well have been able to rescue some of them. +Rosalind Ayres as Lady Duff-Gordon: A world-famous fashion designer and Sir Cosmo's wife. She is rescued in Lifeboat 1 with her husband. They never lived down rumors that they had forbidden the lifeboat's crew to return to the wreck site in case they would be swamped. +Rochelle Rose as Noël Leslie, Countess of Rothes: The Countess is shown to be friendly with Cal and the DeWitt Bukaters. Despite being of a higher status in society than Sir Cosmo and Lady Duff-Gordon, she is kind, and helps row the boat and even looks after the steerage passengers. +Scott G. Anderson as Frederick Fleet: The lookout who saw the iceberg. Fleet escapes the sinking ship aboard Lifeboat 6. +Paul Brightwell as Quartermaster Robert Hichens: One of the ship's six quartermasters and at the ship's wheel at the time of collision. He is in charge of lifeboat 6. He refuses to go back and pick up survivors after the sinking and eventually the boat is commandeered by Molly Brown. +Martin East as Reginald Lee: The other lookout in the crow's nest. He survives the sinking. +Gregory Cooke as Jack Phillips: Senior wireless operator on board the Titanic whom Captain Smith ordered to send the distress signal. +Craig Kelly as Harold Bride: Junior wireless operator on board the Titanic. +Liam Tuohy as Chief Baker Charles Joughin: The baker appears in the film helping Rose stand up after she falls, following her and Jack to the ship's stern, and finally hanging onto the ship's railing as it sinks, drinking brandy from a flask. According to the real Joughin's testimony, he rode the ship down and stepped into the water without getting his hair wet. He also admitted to hardly feeling the cold, most likely thanks to alcohol. In a deleted scene, he's shown throwing deckchairs overboard before taking a drink from his bottle. +Terry Forrestal as Chief Engineer Joseph G. Bell: Bell and his men worked until the last minute to keep the lights and the power on in order for distress signals to get out. Bell and all of the engineers died in the bowels of the Titanic. + + +=== Cameos === +Several crew members of the Akademik Mstislav Keldysh appear in the film, including Anatoly Sagalevich, creator and pilot of the Mir self-propelled Deep Submergence Vehicle. Anders Falk, who filmed a documentary about the film's sets for the Titanic Historical Society, makes a cameo appearance in the film as a Swedish immigrant whom Jack Dawson meets when he enters his cabin; Edward Kamuda and Karen Kamuda, then President and Vice President of the Society, who served as film consultants, were cast as extras in the film. + + +== Pre-production == + + +=== Writing and inspiration === + +James Cameron has long had a fascination with shipwrecks, and for him Titanic was "the Mount Everest of shipwrecks". He was almost past the point in his life when he felt he could consider an undersea expedition, but said he still had "a mental restlessness" to live the life he had turned away from when he switched from the sciences to the arts in college. So when an IMAX film was made from footage shot of the wreck itself, he decided to seek Hollywood funding to "pay for an expedition and do the same thing". It was "not because I particularly wanted to make the movie," Cameron said. "I wanted to dive to the shipwreck."Cameron wrote a scriptment for a Titanic film, met with 20th Century Fox executives including Peter Chernin, and pitched it as "Romeo and Juliet on the Titanic". Cameron stated, "They were like, 'Oooooohkaaaaaay – a three-hour romantic epic? Sure, that's just what we want. Is there a little bit of Terminator in that? Any Harrier jets, shoot-outs, or car chases?' I said, 'No, no, no. It's not like that.'" The studio was dubious about the idea's commercial prospects, but, hoping for a long-term relationship with Cameron, they gave him a greenlight.Cameron convinced Fox to promote the film based on the publicity afforded by shooting the Titanic wreck itself, and organized several dives to the site over a period of two years. "My pitch on that had to be a little more detailed," said Cameron. "So I said, 'Look, we've got to do this whole opening where they're exploring the Titanic and they find the diamond, so we're going to have all these shots of the ship." Cameron stated, "Now, we can either do them with elaborate models and motion control shots and CG and all that, which will cost X amount of money – or we can spend X plus 30 per cent and actually go shoot it at the real wreck."The crew shot at the real wreck in the Atlantic Ocean twelve times in 1995. At that depth, with a water pressure of 6,000 pounds per square inch, "one small flaw in the vessel's superstructure would mean instant death for all on board." Not only were the dives high-risk, but adverse conditions prevented Cameron from getting the high-quality footage that he wanted. During one dive, one of the submersibles collided with Titanic's hull, damaging both sub and ship, and leaving fragments of the submersible's propeller shroud scattered around the superstructure. The external bulkhead of Captain Smith's quarters collapsed, exposing the interior. The area around the entrance to the Grand Staircase was also damaged.Descending to the actual site made both Cameron and crew want "to live up to that level of reality ... But there was another level of reaction coming away from the real wreck, which was that it wasn't just a story, it wasn't just a drama," he said. "It was an event that happened to real people who really died. Working around the wreck for so much time, you get such a strong sense of the profound sadness and injustice of it, and the message of it." Cameron stated, "You think, 'There probably aren't going to be many filmmakers who go to Titanic. There may never be another one – maybe a documentarian." Due to this, he felt "a great mantle of responsibility to convey the emotional message of it – to do that part of it right, too".After filming the underwater shots, Cameron began writing the screenplay. He wanted to honor the people who died during the sinking, so he spent six months researching all of the Titanic's crew and passengers. "I read everything I could. I created an extremely detailed timeline of the ship's few days and a very detailed timeline of the last night of its life," he said. "And I worked within that to write the script, and I got some historical experts to analyze what I'd written and comment on it, and I adjusted it." He paid meticulous attention to detail, even including a scene depicting the Californian's role in Titanic's demise, though this was later cut. From the beginning of the shoot, they had "a very clear picture" of what happened on the ship that night. "I had a library that filled one whole wall of my writing office with Titanic stuff, because I wanted it to be right, especially if we were going to dive to the ship," he said. "That set the bar higher in a way – it elevated the movie in a sense. We wanted this to be a definitive visualization of this moment in history as if you'd gone back in a time machine and shot it."Cameron was influenced in his crafting of the film by the 1958 British production A Night to Remember, which he had seen as a youth. He liberally copied some dialogue and scenes from that film, including the lively party of the passengers in steerage, and the musicians playing on the deck during the sinking of the ship.Cameron felt the Titanic sinking was "like a great novel that really happened", but that the event had become a mere morality tale; the film would give audiences the experience of living the history. The treasure hunter Brock Lovett represented those who never connected with the human element of the tragedy, while the blossoming romance of Jack and Rose, Cameron believed, would be the most engaging part of the story: when their love is finally destroyed, the audience would mourn the loss. He said: "All my films are love stories, but in Titanic I finally got the balance right. It's not a disaster film. It's a love story with a fastidious overlay of real history."Cameron framed the romance with the elderly Rose to make the intervening years palpable and poignant. While Winslet and Stuart stated their belief that, instead of being asleep in her bed, the character dies at the end of the film, Cameron said that he would rather not reveal what he intended with the ending because "[t]he answer has to be something you supply personally; individually." + + +=== Scale modeling === + +Harland and Wolff, Titanic's builders, opened their private archives to the crew, sharing blueprints that were previously thought lost. For the ship's interiors, production designer Peter Lamont's team looked for artifacts from the era. The newness of the ship meant every prop had to be made from scratch. Fox acquired 40 acres of waterfront south of Playas de Rosarito in Mexico, and began building a new studio on May 31, 1996. A horizon tank of seventeen million gallons was built for the exterior of the reconstructed ship, providing 270 degrees of ocean view. The ship was built to full scale, but Lamont removed redundant sections on the superstructure and forward well deck for the ship to fit in the tank, with the remaining sections filled with digital models. The lifeboats and funnels were shrunken by ten percent. The boat deck and A-deck were working sets, but the rest of the ship was just steel plating. Within was a fifty-foot lifting platform for the ship to tilt during the sinking sequences. The 60 foot 1/8th scale model of the stern section was designed by naval architect Jay Kantola utilizing plans of the Titanic's sister ship RMS Olympic. Towering above was a 162-foot-tall (49 m) tower crane on 600 feet (180 m) of rail track, acting as a combined construction, lighting, and camera platform.The sets representing the interior rooms of the Titanic were reproduced exactly as originally built, using photographs and plans from the Titanic's builders. The Grand Staircase, which features prominently in the film, was recreated to a high standard of authenticity, though it was widened 30% compared to the original and reinforced with steel girders. Craftsmen from Mexico and Britain sculpted the ornate paneling and plaster-work based on Titanic's original designs. The carpeting, upholstery, individual pieces of furniture, light fixtures, chairs, cutlery and crockery with the White Star Line crest on each piece were among the objects recreated according to original designs. Cameron additionally hired two Titanic historians, Don Lynch and Ken Marschall, to authenticate the historical detail in the film. + + +== Production == +Principal photography for Titanic began on July 31, 1996 at Dartmouth, Nova Scotia, with the filming of the modern-day expedition scenes aboard the Akademik Mstislav Keldysh. In September 1996, the production moved to the newly built Fox Baja Studios in Rosarito, Mexico, where a full-scale Titanic had been constructed. The poop deck was built on a hinge that could rise from zero to 90 degrees in a few seconds, just as the ship's stern rose during the sinking. For the safety of the stuntmen, many props were made of foam rubber. By November 15, the boarding scenes were being shot. Cameron chose to build his Titanic on the starboard side as a study of weather data revealed it was a prevailing north-to-south wind, which blew the funnel smoke aft. This posed a problem for shooting the ship's departure from Southampton, as it was docked on its port side. Implementation of written directions, as well as props and costumes, had to be reversed; for example, if someone walked to their right in the script, they had to walk left during shooting. In post-production, the film was flipped to the correct direction.A full-time etiquette coach was hired to instruct the cast in the manners of the upper class gentility in 1912. Despite this, several critics picked up on anachronisms in the film, not least involving the two main stars. + +Cameron sketched Jack's nude portrait of Rose for a scene which he feels has the backdrop of repression. "You know what it means for her, the freedom she must be feeling. It's kind of exhilarating for that reason," he said. The nude scene was DiCaprio and Winslet's first scene together. "It wasn't by any kind of design, although I couldn't have designed it better. There's a nervousness and an energy and a hesitance in them," Cameron stated. "They had rehearsed together, but they hadn't shot anything together. If I'd had a choice, I probably would have preferred to put it deeper into the body of the shoot." Cameron said he and his crew "were just trying to find things to shoot" because the big set "wasn't ready for months, so we were scrambling around trying to fill in anything we could get to shoot." After seeing the scene on film, Cameron felt it worked out considerably well.Other times on the set were not as smooth. The shoot was an arduous experience that "cemented Cameron's formidable reputation as 'the scariest man in Hollywood'. He became known as an uncompromising, hard-charging perfectionist" and a "300-decibel screamer, a modern-day Captain Bligh with a megaphone and walkie-talkie, swooping down into people's faces on a 162ft crane". Winslet chipped a bone in her elbow during filming and had been worried that she would drown in the 17m-gallon water tank in which the ship was to be sunk. "There were times when I was genuinely frightened of him. Jim has a temper like you wouldn't believe," she said. "'God damn it!' he would yell at some poor crew member, 'that's exactly what I didn't want!'" Her co-star, Bill Paxton, was familiar with Cameron's work ethic from his earlier experience with him. "There were a lot of people on the set. Jim is not one of those guys who has the time to win hearts and minds," he said. The crew felt Cameron had an evil alter ego and so nicknamed him "Mij" (Jim spelled backwards). In response to the criticism, Cameron stated, "Film-making is war. A great battle between business and aesthetics."On August 9, 1996, during the Akademik Mstislav Keldysh shoot in Canada, an unknown person, likely a crew member, put the dissociative drug PCP into the soup that Cameron and various others ate one night in Dartmouth, Nova Scotia. It sent more than 50 people to the hospital, including Paxton. "There were people just rolling around, completely out of it. Some of them said they were seeing streaks and psychedelics," said actor Lewis Abernathy. Cameron managed to vomit before the drug took a full hold. Abernathy was shocked at the way he looked. "One eye was completely red, like the Terminator eye. A pupil, no iris, beet red. The other eye looked like he'd been sniffing glue since he was four." The Nova Scotia Department of Health confirmed that the soup had contained PCP on August 27, and the Halifax Regional Police Service announced a criminal investigation the next day. The investigation was closed in February 1999. The person behind the poisoning was never caught. +The filming schedule was intended to last 138 days but grew to 160 (officially wrapped on March 23, 1997). Many cast members came down with colds, flu, or kidney infections after spending hours in cold water, including Winslet. In the end, she decided she would not work with Cameron again unless she earned "a lot of money". Several others left the production, and three stuntmen broke their bones, but the Screen Actors Guild decided, following an investigation, that nothing was inherently unsafe about the set. Additionally, DiCaprio said there was no point when he felt he was in danger during filming. Cameron believed in a passionate work ethic and never apologized for the way he ran his sets, although he acknowledged:I'm demanding, and I'm demanding on my crew. In terms of being kind of militaresque, I think there's an element of that in dealing with thousands of extras and big logistics and keeping people safe. I think you have to have a fairly strict methodology in dealing with a large number of people. +The costs of filming Titanic ballooned and eventually reached $200 million, a bit over $1 million per minute of screen time. Fox executives panicked and suggested an hour of specific cuts from the three-hour film. They argued the extended length would mean fewer showings, thus less revenue, even though long epics are more likely to help directors win Oscars. Cameron refused, telling Fox, "You want to cut my movie? You're going to have to fire me! You want to fire me? You're going to have to kill me!" The executives did not want to start over, because it would mean the loss of their entire investment, but they also initially rejected Cameron's offer of forfeiting his share of the profits as an empty gesture, as they predicted profits would be unlikely. Worried about the mounting costs, Fox wanted to find a partner studio to co-finance the film; Fox approached Paramount Pictures in May 1996, and the two studios agreed to split the costs and distribution rights. Fox retained the international distribution rights and sold the domestic rights to Paramount in return for $65 million, in an effort to recoup their investment.Cameron explained forfeiting his share as complex. "... the short version is that the film cost proportionally much more than T2 and True Lies. Those films went up seven or eight percent from the initial budget. Titanic also had a large budget to begin with, but it went up a lot more," he said. "As the producer and director, I take responsibility for the studio that's writing the checks, so I made it less painful for them. I did that on two different occasions. They didn't force me to do it; they were glad that I did." + + +== Post-production == + + +=== Effects === +Cameron wanted to push the boundary of special effects with his film, and enlisted Digital Domain and Pacific Data Images to continue the developments in digital technology which the director pioneered while working on The Abyss and Terminator 2: Judgment Day. Many previous films about Titanic shot water in slow motion, which did not look wholly convincing. Cameron encouraged his crew to shoot their 45-foot-long (14 m) miniature of the ship as if "we're making a commercial for the White Star Line". Afterwards, digital water and smoke were added, as were extras captured on a motion capture stage. Visual effects supervisor Rob Legato scanned the faces of many actors, including himself and his children, for the digital extras and stuntmen. There was also a 65-foot-long (20 m) model of the ship's stern that could break in two repeatedly, the only miniature to be used in water. For scenes set in the ship's engines, footage of the SS Jeremiah O'Brien's engines were composited with miniature support frames, and actors shot against a greenscreen. In order to save money, the first-class lounge was a miniature set incorporated into a greenscreen backdrop behind the actors. The miniature of the Lounge would later be crushed to simulate the destruction of the room and a scale model of a First-Class corridor flooded with jets of water while the camera pans out. + +An enclosed 5,000,000-US-gallon (19,000,000 L) tank was used for sinking interiors, in which the entire set could be tilted into the water. In order to sink the Grand Staircase, 90,000 US gallons (340,000 L) of water were dumped into the set as it was lowered into the tank. Unexpectedly, the waterfall ripped the staircase from its steel-reinforced foundations, although no one was hurt. The 744-foot-long (227 m) exterior of Titanic had its first half lowered into the tank, but as the heaviest part of the ship it acted as a shock absorber against the water; to get the set into the water, Cameron had much of the set emptied and even smashed some of the promenade windows himself. After submerging the dining saloon, three days were spent shooting Lovett's ROV traversing the wreck in the present. The post-sinking scenes in the freezing Atlantic were shot in a 350,000-US-gallon (1,300,000 L) tank, where the frozen corpses were created by applying on actors a powder that crystallized when exposed to water, and wax was coated on hair and clothes.The climactic scene, which features the breakup of the ship directly before it sinks as well as its final plunge to the bottom of the Atlantic, involved a tilting full-sized set, 150 extras, and 100 stunt performers. Cameron criticized previous Titanic films for depicting the liner's final plunge as a graceful slide underwater. He "wanted to depict it as the terrifyingly chaotic event that it really was". When carrying out the sequence, people needed to fall off the increasingly tilting deck, plunging hundreds of feet below and bouncing off of railings and propellers on the way down. A few attempts to film this sequence with stunt people resulted in some minor injuries, and Cameron halted the more dangerous stunts. The risks were eventually minimized "by using computer-generated people for the dangerous falls".A Linux-based operating system was utilized for the creation of the effects. + + +=== Editing === +There was one "crucial historical fact" Cameron chose to omit from the film – the SS Californian was close to the Titanic the night she sank but had turned off its radio for the night, did not hear her crew's SOS calls, and did not respond to their distress flares. "Yes, the [SS] Californian. That wasn't a compromise to mainstream filmmaking. That was really more about emphasis, creating an emotional truth to the film," stated Cameron. He said there were aspects of retelling the sinking that seemed important in pre- and post-production, but turned out to be less important as the film evolved. "The story of the Californian was in there; we even shot a scene of them switching off their Marconi radio set," said Cameron. "But I took it out. It was a clean cut, because it focuses you back onto that world. If Titanic is powerful as a metaphor, as a microcosm, for the end of the world in a sense, then that world must be self-contained."During the first assembly cut, Cameron altered the planned ending, which had given resolution to Brock Lovett's story. In the original version of the ending, Brock and Lizzy see the elderly Rose at the stern of the boat and fear she is going to commit suicide. Rose then reveals that she had the "Heart of the Ocean" diamond all along but never sold it, in order to live on her own without Cal's money. She tells Brock that life is priceless and throws the diamond into the ocean, after allowing him to hold it. After accepting that treasure is worthless, Brock laughs at his stupidity. Rose then goes back to her cabin to sleep, whereupon the film ends in the same way as the final version. In the editing room, Cameron decided that by this point, the audience would no longer be interested in Brock Lovett and cut the resolution to his story, so that Rose is alone when she drops the diamond. He also did not want to disrupt the audience's melancholy after the Titanic's sinking. Paxton agreed that his scene with Brock's epiphany and laugh was unnecessary, stating that "I would have shot heroin to make the scene work better ... you didn't really need anything from us. Our job was done by then ... If you're smart and you take the ego and the narcissism out of it, you'll listen to the film, and the film will tell you what it needs and what it does not need".The version used for the first test screening featured a fight between Jack and Lovejoy which takes place after Jack and Rose escape into the flooded dining saloon, but the test audiences disliked it. The scene was written to give the film more suspense, and featured Cal (falsely) offering to give Lovejoy, his valet, the "Heart of the Ocean" if he can get it from Jack and Rose. Lovejoy goes after the pair in the sinking first-class dining room. Just as they are about to escape him, Lovejoy notices Rose's hand slap the water as it slips off the table behind which she is hiding. In revenge for framing him for the "theft" of the necklace, Jack attacks him and smashes his head against a glass window, which explains the gash on Lovejoy's head that can be seen when he dies in the completed version of the film. In their reactions to the scene, test audiences said it would be unrealistic to risk one's life for wealth, and Cameron cut it for this reason, as well as for timing and pacing reasons. Many other scenes were cut for similar reasons. + + +=== Heart of the Ocean === +For the Heart of the Ocean design, London-based jewelers Asprey & Garrard used cubic zirconias set in white gold to create an Edwardian-style necklace to be used as a prop in the film. The studio designed and produced three variations, very similar but unique and distinguishable in character. Two of them were used in the film while the third went unused until after the film had been released. The three necklaces are commonly known as the original prop, the J. Peterman necklace and the Asprey necklace. +The third and final design was not used in the film. After the film's success, Asprey & Garrard were commissioned to create an authentic Heart of the Ocean necklace using the original design. The result was a platinum-set, 171-carat (34.2 g) heart-shaped Ceylon sapphire surrounded by 103 diamonds. This design featured a much larger inverted pear-shaped Ceylon sapphire with a subtle cleft to resemble a heart. The chain for this necklace also featured a mix of round, pear, and marquise cut white diamonds. The bail also featured a heart cut white diamond with another round cut diamond attached to an inverted pear shape diamond which was then attached to the cage of the main stone. The necklace was donated to Sotheby's auction house in Beverly Hills for an auction benefiting the Diana, Princess of Wales Memorial Fund and Southern California's Aid For AIDS. It was sold to an unidentified Asprey client for $1.4 million, under the agreement that Dion would wear it two nights later at the 1998 Academy Awards ceremony. This necklace has since not been made available for public viewing. + + +== Soundtrack == + +Cameron wrote Titanic while listening to the work of the Irish new-age musician Enya. After Enya declined an invitation to compose for the film, Cameron instead chose James Horner. The two had parted ways after a tumultuous working experience on Aliens, but Titanic cemented a successful collaboration that lasted until Horner's death. For the vocals heard throughout the film, Horner chose the Norwegian singer Sissel Kyrkjebø, commonly known as "Sissel". Horner knew Sissel from her album Innerst i sjelen, and particularly liked how she sang "Eg veit i himmerik ei borg" ("I Know in Heaven There Is a Castle"). He tried around 30 singers before choosing Sissel.Horner wrote the end theme, "My Heart Will Go On", in secret with Will Jennings because Cameron did not want any songs in the film. Céline Dion agreed to record a demo at the persuasion of her husband René Angélil. Horner waited until Cameron was in an appropriate mood before presenting him with the song. After playing it several times, Cameron declared his approval, although worried that he would have been criticized for "going commercial at the end of the movie". Cameron also wanted to appease anxious studio executives and "saw that a hit song from his movie could only be a positive factor in guaranteeing its completion". + + +== Release == + + +=== Initial screening === +Distribution for the film was split between Paramount and Fox; the former handling the distribution in the United States and Canada, and the latter handling the international release. Both studios expected Cameron to complete the film for a release on July 2, 1997. The film was to be released on this date "to exploit the lucrative summer season ticket sales when blockbuster films usually do better". In April, Cameron said the film's special effects were too complicated and that releasing the film on that date would not be possible. The studios considered pushing the film to late July or the first week of August, but Harrison Ford, whose film Air Force One was to be released on July 25, was reported to have informed Paramount, which had produced his lucrative Indiana Jones and Jack Ryan franchises, that he would never work with the studio again if they released Titanic so close to his own film. On May 29, 1997, Paramount pushed back the release date to December 19, 1997. "This fueled speculation that the film itself was a disaster." A preview screening in Minneapolis on July 14 "generated positive reviews" and "[c]hatter on the internet was responsible for more favorable word of mouth about the [film]". This eventually led to more positive media coverage.Cameron refused to hold the film's world premiere in Los Angeles. Paramount disagreed with Cameron's decision, but Fox acquiesced and went ahead and held the premiere on November 1, 1997, at the Tokyo International Film Festival, where reaction was described as "tepid" by The New York Times. Positive reviews started to appear back in the United States; the official Hollywood premiere occurred on December 14, 1997, where "the big movie stars who attended the opening were enthusiastically gushing about the film to the world media". + + +== Box office == +Including revenue from the 2012, 2017 and 2023 reissues, Titanic earned $674.3 million in North America and $1.583 billion in other countries, for a worldwide total of $2.257 billion. It became the highest-grossing film of all time worldwide in 1998, beating Jurassic Park (1993). The film remained so for twelve years, until Avatar (2009), also written and directed by Cameron, surpassed it in 2010. On March 1, 1998, it became the first film to earn more than $1 billion worldwide and on the weekend April 13–15, 2012—a century after the original vessel's foundering, Titanic became the second film to cross the $2 billion threshold during its 3D re-release. Box Office Mojo estimates that Titanic is the fifth-highest-grossing film of all time in North America when adjusting for ticket price inflation. The site also estimates that the film sold over 128 million tickets in the US in its initial theatrical run.Titanic was the first foreign-language film to succeed in India, which claims to have the largest movie-going audience in the world. A Hindustan Times report attributes this to the film's similarities and shared themes with most Bollywood films. + + +=== Initial theatrical run === +The film received steady attendance after opening in North America on Friday, December 19, 1997. By the end of that same weekend, theaters were beginning to sell out. The film earned $8,658,814 on its opening day and $28,638,131 over the opening weekend from 2,674 theaters, averaging to about $10,710 per venue, and ranking number one at the box office, ahead of Mouse Hunt, Scream 2 and the eighteenth James Bond film, Tomorrow Never Dies. It would go on to surpass The Godfather Part III's record for having the highest Christmas Day gross, generating a total of $9.2 million. For its second weekend, the film made $35.6 million, making it the biggest December weekend gross, surpassing Scream 2. By New Year's Day, Titanic had made over $120 million, had increased in popularity and theaters continued to sell out. In just 44 days, it became the fastest film to approach the $300 million mark at the domestic box office, surpassing the former record held by Jurassic Park, which took 67 days to do so. Titanic would hold this record until 1999 when it was taken by Star Wars: Episode I – The Phantom Menace.Its highest grossing single day was Saturday, February 14, 1998, on which it earned $13,048,711, more than eight weeks after its North American debut. On March 14, it surpassed Star Wars as the highest-grossing film ever in North America. It stayed at number one for 15 consecutive weeks in North America, a record for any film. By April 1998, the film's number one spot would be overtaken by Lost in Space, dropping into second place. The film stayed in theaters in North America for almost 10 months before finally closing on Thursday, October 1, 1998, with a final domestic gross of $600,788,188, equivalent to $1095.2 million in 2022. Outside North America, the film made double its North American gross, generating $1,242,413,080 and accumulating a grand total of $1,843,201,268 worldwide from its initial theatrical run. + + +=== Commercial analysis === +Before Titanic's release, various film critics predicted the film would be a significant disappointment at the box office, especially since it was the most expensive film ever made at the time. When it was shown to the press in autumn of 1997, "it was with massive forebodings", since the "people in charge of the screenings believed they were on the verge of losing their jobs – because of this great albatross of a picture on which, finally, two studios had to combine to share the great load of its making". Cameron also thought he was "headed for disaster" at one point during filming. "We labored the last six months on Titanic in the absolute knowledge that the studio would lose $100 million. It was a certainty," he stated. As the film neared release, "particular venom was spat at Cameron for what was seen as his hubris and monumental extravagance". A film critic for the Los Angeles Times wrote that "Cameron's overweening pride has come close to capsizing this project" and that the film was "a hackneyed, completely derivative copy of old Hollywood romances". + +When the film became a success, with an unprecedented box-office performance, it was credited for being a love story that captured its viewers' emotions. The film was playing on 3,200 screens ten weeks after it opened, and out of its fifteen straight weeks on top of the charts, jumped 43% in total sales in its ninth week of release. It earned over $20 million for each of its first 10 weekends, and after 14 weeks was still bringing in more than $1 million on weekdays. 20th Century Fox estimated that seven percent of American teenage girls had seen Titanic twice by its fifth week. Although young women who saw the film several times and subsequently caused "Leo-Mania" were often credited with having primarily propelled the film to its all-time box office record, other reports have attributed the film's success to positive word of mouth and repeat viewership due to the love story combined with the ground-breaking special effects. The Hollywood Reporter estimated that after a combined production and promotion cost of $487 million, the film turned a net profit of $1.4 billion, with a modern profit of as much as $4 billion after ancillary sources.Titanic's impact on men has also been especially credited. It is considered one of the films that make men cry, with MSNBC's Ian Hodder stating that men admire Jack's sense of adventure and his ambitious behavior to win over Rose, which contributes to their emotional attachment to Jack. The film's ability to make men cry was briefly parodied in the 2009 film Zombieland, where character Tallahassee (Woody Harrelson), when recalling the death of his young son, states: "I haven't cried like that since Titanic."In 2010, the BBC analyzed the stigma over men crying during Titanic and films in general. "Middle-aged men are not 'supposed' to cry during movies," stated Finlo Rohrer of the website, citing the ending of Titanic as having generated such tears, adding that "men, if they have felt weepy during [this film], have often tried to be surreptitious about it." Professor Mary Beth Oliver, of Penn State University, stated, "For many men, there is a great deal of pressure to avoid expression of 'female' emotions like sadness and fear. From a very young age, males are taught that it is inappropriate to cry, and these lessons are often accompanied by a great deal of ridicule when the lessons aren't followed." Rohrer said, "Indeed, some men who might sneer at the idea of crying during Titanic will readily admit to becoming choked up during Saving Private Ryan or Platoon." For men in general, "the idea of sacrifice for a 'brother' is a more suitable source of emotion".Scott Meslow of The Atlantic stated while Titanic initially seems to need no defense, given its success, it is considered a film "for 15-year-old girls" by its main detractors. He argued that dismissing Titanic as fodder for teenage girls fails to consider the film's accomplishment: "that [this] grandiose, 3+ hour historical romantic drama is a film for everyone—including teenage boys." Meslow stated that despite the film being ranked high by males under the age of 18, matching the ratings for teenage boy-targeted films like Iron Man, it is common for boys and men to deny liking Titanic. He acknowledged his own rejection of the film as a child while secretly loving it. "It's this collection of elements—the history, the romance, the action—that made (and continues to make) Titanic an irresistible proposition for audiences of all ages across the globe," he stated. "Titanic has flaws, but for all its legacy, it's better than its middlebrow reputation would have you believe. It's a great movie for 15-year-old girls, but that doesn't mean it's not a great movie for everyone else too."Quotes in the film aided its popularity. Titanic's catchphrase "I'm the king of the world!" became one of the film industry's more popular quotations. According to Richard Harris, a psychology professor at Kansas State University, who studied why people like to cite films in social situations, using film quotations in everyday conversation is similar to telling a joke and a way to form solidarity with others. "People are doing it to feel good about themselves, to make others laugh, to make themselves laugh", he said.Cameron explained the film's success as having significantly benefited from the experience of sharing. "When people have an experience that's very powerful in the movie theatre, they want to go share it. They want to grab their friend and bring them, so that they can enjoy it," he said. "They want to be the person to bring them the news that this is something worth having in their life. That's how Titanic worked." Media Awareness Network stated, "The normal +repeat viewing rate for a blockbuster theatrical film is about 5%. The repeat rate for Titanic was over 20%." The box office receipts "were even more impressive" when factoring in "the film's 3-hour-and-14-minute length meant that it could only be shown three times a day compared to a normal movie's four showings". In response to this, "[m]any theatres started midnight showings and were rewarded with full houses until almost 3:30 am".Titanic held the record for box office gross for 12 years. Cameron's follow-up film, Avatar, was considered the first film with a genuine chance at surpassing its worldwide gross, and did so in 2010. Various explanations for why the film was able to successfully challenge Titanic were given. For one, "Two-thirds of Titanic's haul was earned overseas, and Avatar [tracked] similarly ... Avatar opened in 106 markets globally and was no. 1 in all of them" and the markets "such as Russia, where Titanic saw modest receipts in 1997 and 1998, are white-hot today" with "more screens and moviegoers" than ever before. Brandon Gray, president of Box Office Mojo, said that while Avatar may beat Titanic's revenue record, the film is unlikely to surpass Titanic in attendance. "Ticket prices were about $3 cheaper in the late 1990s." In December 2009, Cameron had stated, "I don't think it's realistic to try to topple Titanic off its perch. Some pretty good movies have come out in the last few years. Titanic just struck some kind of chord." In a January 2010 interview, he gave a different take on the matter once Avatar's performance was easier to predict. "It's gonna happen. It's just a matter of time," he said.Author Alexandra Keller, when analyzing Titanic's success, stated that scholars could agree that the film's popularity "appears dependent on contemporary culture, on perceptions of history, on patterns of consumerism and globalization, as well as on those elements experienced filmgoers conventionally expect of juggernaut film events in the 1990s – awesome screen spectacle, expansive action, and, more rarely seen, engaging characters and epic drama." + + +== Critical reception == + + +=== Initial === +Titanic garnered mostly positive reviews from film critics, and was positively reviewed by audiences and scholars, who commented on the film's cultural, historical, and political impacts. On review aggregator website Rotten Tomatoes, the film has an approval rating of 88% based on 250 reviews, with an average rating of 8/10. The site's critical consensus reads, "A mostly unqualified triumph for James Cameron, who offers a dizzying blend of spectacular visuals and old-fashioned melodrama." Metacritic, which assigned a weighted average score of 75 out of 100, based on 35 critics, reports the film has "generally favorable reviews". Audiences polled by CinemaScore gave the film a rare "A+" grade, one of fewer than 60 films in the history of the service from 1982 to 2011 to earn the score.With regard to the film's overall design, Roger Ebert stated: "It is flawlessly crafted, intelligently constructed, strongly acted, and spellbinding ... Movies like this are not merely difficult to make at all, but almost impossible to make well." He credited the "technical difficulties" with being "so daunting that it's a wonder when the filmmakers are also able to bring the drama and history into proportion" and "found [himself] convinced by both the story and the sad saga". He named it his ninth best film of 1997. On the television program Siskel & Ebert, the film received "two thumbs up" and was praised for its accuracy in recreating the ship's sinking; Ebert described the film as "a glorious Hollywood epic" and "well worth the wait," and Gene Siskel found Leonardo DiCaprio "captivating".James Berardinelli stated: "Meticulous in detail, yet vast in scope and intent, Titanic is the kind of epic motion picture event that has become a rarity. You don't just watch Titanic, you experience it." It was named his second best film of 1997. Joseph McBride of Boxoffice Magazine concluded: "To describe Titanic as the greatest disaster movie ever made is to sell it short. James Cameron's recreation of the 1912 sinking of the 'unsinkable' liner is one of the most magnificent pieces of serious popular entertainment ever to emanate from Hollywood."The romantic and emotionally charged aspects of the film were equally praised. Andrew L. Urban of Urban Cinefile said: "You will walk out of Titanic not talking about budget or running time, but of its enormous emotive power, big as the engines of the ship itself, determined as its giant propellers to gouge into your heart, and as lasting as the love story that propels it." Owen Gleiberman of Entertainment Weekly described the film as "a lush and terrifying spectacle of romantic doom. Writer-director James Cameron has restaged the defining catastrophe of the early 20th century on a human scale of such purified yearning and dread that he touches the deepest levels of popular moviemaking." Janet Maslin of The New York Times commented that "Cameron's magnificent Titanic is the first spectacle in decades that honestly invites comparison to Gone With the Wind." Adrian Turner of Radio Times awarded it four stars out of five, stating "Cameron's script wouldn't have sustained Clark Gable and Vivien Leigh for 80 minutes, but, somehow, he and his magical cast revive that old-style studio gloss for three riveting hours. Titanic is a sumptuous assault on the emotions, with a final hour that fully captures the horror and the freezing, paralysing fear of the moment. And there are single shots, such as an awesome albatross-like swoop past the steaming ship, when you sense Cameron hugging himself with the fun of it all."Titanic suffered backlash in addition to its success. Some reviewers felt that while the visuals were spectacular, the story and dialogue were weak. Richard Corliss of Time magazine wrote a mostly negative review, criticizing the lack of interesting emotional elements. Kenneth Turan's review in the Los Angeles Times was particularly scathing. Dismissing the emotive elements, he stated, "What really brings on the tears is Cameron's insistence that writing this kind of movie is within his abilities. Not only is it not, it is not even close." He later argued that the only reason that the film won Oscars was because of its box office total. Barbara Shulgasser of The San Francisco Examiner gave Titanic one star out of four, citing a friend as saying, "The number of times in this unbelievably badly written script that the two [lead characters] refer to each other by name was an indication of just how dramatically the script lacked anything more interesting for the actors to say." + + +=== Retrospective === +According to Dalin Rowell of /Film, "With complaints about its lengthy runtime, observations that certain characters could have easily fit onto pieces of floating furniture, and jokes about its melodramatic nature, Titanic is no stranger to modern-day criticism." In 2002, filmmaker Robert Altman called it "the most dreadful piece of work I've ever seen in my entire life". Similarly, French New Wave director and former Cahiers du Cinéma editor Jacques Rivette referred to it as "garbage" in a 1998 interview with Frédéric Bonnaud and was particularly critical of Winslet's performance, who he said was "unwatchable, the most slovenly girl to appear on the screen in a long, long time." In 2003, the film topped a poll of "Best Film Endings", but it also topped a poll by Film 2003 as "the worst movie of all time".In his 2012 study of the lives of the passengers on the Titanic, historian Richard Davenport-Hines said, "Cameron's film diabolized rich Americans and educated English, anathematizing their emotional restraint, good tailoring, punctilious manners and grammatical training, while it made romantic heroes of the poor Irish and the unlettered." The British film magazine Empire reduced their rating of the film from the maximum five stars and an enthusiastic review, to four stars with a less positive review in a later edition, to accommodate its readers' tastes, who wanted to disassociate themselves from the hype surrounding the film, and the reported activities of its fans, such as those attending multiple screenings. In addition to this, positive and negative parodies and other such spoofs of the film abounded and were circulated on the internet, often inspiring passionate responses from fans of various opinions of the film. Benjamin Willcock of DVDActive.com did not understand the backlash or the passionate hatred for the film. "What really irks me ...," he said, "are those who make nasty stabs at those who do love it." Willcock stated, "I obviously don't have anything against those who dislike Titanic, but those few who make you feel small and pathetic for doing so (and they do exist, trust me) are way beyond my understanding and sympathy."In 1998, Cameron responded to the backlash, and Kenneth Turan's review in particular, by writing "Titanic is not a film that is sucking people in with flashy hype and spitting them out onto the street feeling let down and ripped off. They are returning again and again to repeat an experience that is taking a 3-hour and 14-minute chunk out of their lives, and dragging others with them, so they can share the emotion." Cameron emphasized that people from all ages (ranging from 8 to 80) and from all backgrounds were "celebrating their own essential humanity" by seeing it. He described the script as earnest and straightforward, and said it intentionally "incorporates universals of human experience and emotion that are timeless – and familiar because they reflect our basic emotional fabric" and that the film was able to succeed in this way by dealing with archetypes. He did not see it as pandering. "Turan mistakes archetype for cliché," he said. "I don't share his view that the best scripts are only the ones that explore the perimeter of human experience, or flashily pirouette their witty and cynical dialogue for our admiration."In 2000, Almar Haflidason of the BBC wrote that "the critical knives were out long before James Cameron's Titanic was complete. Spiralling costs that led to it becoming the most expensive motion picture of the 20th Century, and a cast without any big stars seemed to doom the film before release. But box office and audience appreciation proved Cameron right and many critics wrong." He added that "the sinking of the great ship is no secret, yet for many exceeded expectations in sheer scale and tragedy" and that "when you consider that [the film] tops a bum-numbing three-hour running time, then you have a truly impressive feat of entertainment achieved by Cameron". Empire eventually reinstated its original five-star rating of the film, commenting: "It should be no surprise[,] then[,] that it became fashionable to bash James Cameron's Titanic at approximately the same time it became clear that this was the planet's favourite film. Ever."In 2017, on the 20th anniversary of its release, the film was selected for preservation in the United States National Film Registry by the Library of Congress as being "culturally, historically, or aesthetically significant". It was listed among the 100 best films in an Empire poll and in a later poll of members of the film industry. In 2021, Dalin Rowell of /Film ranked it the third-best film of Cameron's career, stating that it is "easily one of his best films, simply because it defied the odds", and considering it "a legitimately remarkable achievement — one that, despite its large budget, has a humble, earnest center. Even with all of the jokes the Internet loves to throw its way, Titanic demonstrates that Cameron is truly capable of everything he can imagine." + + +== Accolades == + +Titanic began its awards sweep starting with the Golden Globes, winning four: Best Motion Picture – Drama, Best Director, Best Original Score, and Best Original Song. Kate Winslet and Gloria Stuart were also nominees. The film garnered fourteen Academy Award nominations, tying the record set in 1950 by Joseph L. Mankiewicz's All About Eve and won eleven: Best Picture (the second film about the Titanic to win that award, after 1933's Cavalcade), Best Director, Best Art Direction, Best Cinematography, Best Visual Effects, Best Film Editing, Best Costume Design, Best Sound (Gary Rydstrom, Tom Johnson, Gary Summers, Mark Ulano), Best Sound Effects Editing, Best Original Dramatic Score, Best Original Song. Kate Winslet, Gloria Stuart and the make-up artists were the three nominees that did not win, ultimately losing to Helen Hunt in As Good as It Gets, Kim Basinger in L.A. Confidential and Men in Black simultaneously. James Cameron's original screenplay and Leonardo DiCaprio were not nominees. It was the second film to receive eleven Academy Awards, after Ben-Hur. The Lord of the Rings: The Return of the King would also match this record in 2004.Titanic won the 1997 Academy Award for Best Original Song, as well as four Grammy Awards for Record of the Year, Song of the Year, Best Song Written Specifically for a Motion Picture or Television, and Best Female Pop Vocal Performance. The film's soundtrack became the best-selling primarily orchestral soundtrack of all time, and became a worldwide success, spending sixteen weeks at number-one in the United States, and was certified diamond for over eleven million copies sold in the United States alone. The soundtrack also became the best-selling album of 1998 in the U.S. "My Heart Will Go On" won the Grammy Awards for Best Song Written Specifically for a Motion Picture or for Television. +The film also won various awards outside the United States, including the Awards of the Japanese Academy as the Best Foreign Film of the Year. Titanic eventually won nearly ninety awards and had an additional forty-seven nominations from various award-giving bodies around the world. Additionally, the book about the making of the film was at the top of The New York Times' bestseller list for several weeks, "the first time that such a tie-in book had achieved this status".Since its release, Titanic has appeared on the American Film Institute's award-winning 100 Years ... series. So far, it has ranked on the following six lists: + + +== Home media == +Titanic was released worldwide in widescreen and pan and scan formats on VHS on September 1, 1998. More than $50 million was spent to market the home video release of the film. Both VHS formats were also made available in a deluxe boxed gift set with a mounted filmstrip and six lithograph prints from the movie. In the first 3 months, the film sold 25 million copies in North America with a total sales value of $500 million becoming the best selling live-action video, beating Independence Day. In that time, it sold 58 million copies worldwide, outselling The Lion King for a total worldwide revenue of $995 million. By March 2005, the film has sold 8 million DVD and 59 million VHS units. In the United Kingdom, the film sold 1.1 million copies on its first day of release, making it the country's fastest-selling home video release. It would hold this record until it was surpassed by Harry Potter and the Sorcerer's Stone in May 2002 when that film sold 1.2 million home video units during its first day. Within the first week of release, Titanic quickly beat The Full Monty, selling a total of 1.8 million home video copies.A DVD version was released on August 31, 1999, in a widescreen-only (non-anamorphic) single-disc edition with no special features other than a theatrical trailer. Cameron stated at the time that he intended to release a special edition with extra features later. This release became the best-selling DVD of 1999 and early 2000, becoming the first DVD ever to sell one million copies. At the time, less than 5% of all U.S. homes had a DVD player. "When we released the original Titanic DVD, the industry was much smaller, and bonus features were not the standard they are now," said Meagan Burrows, Paramount's president of domestic home entertainment, which made the film's DVD performance even more impressive.Titanic was re-released to DVD on October 25, 2005, when a three-disc Special Collector's Edition was made available in the United States and Canada. This edition contained a newly restored transfer of the film, a 6.1 DTS-ES Discrete surround sound mix and various special features. In PAL regions, two-disc and four-disc variants were released, marketed as the Special Edition and Deluxe Collector's Edition respectively. They were released in the United Kingdom on November 7, 2005. A limited 5-disc set of the film, under the title Deluxe Limited Edition, was also only released in the United Kingdom with only 10,000 copies manufactured. The fifth disc contains Cameron's documentary Ghosts of the Abyss, which was distributed by Walt Disney Pictures. Unlike the individual release of Ghosts of the Abyss, which contained two discs, only the first disc was included in the set. In 2007, for the film's tenth anniversary, a 10th Anniversary Edition was released on DVD, which consists of the first two discs from the three-disc 2005 set containing the movie and the special features on those discs.The film was released on Blu-ray and Blu-ray 3D on September 10, 2012. The 3D presentation of the film is split over two discs and is also THX-certified. Special features on another disc included many of those featured on the 2005 Special Collector's Edition DVD along with two new documentaries titled "Reflections on Titanic" and "Titanic: The Final Word with James Cameron." The latter aired on National Geographic on April 9, 2012 and was executively produced by Cameron. + + +== Re-releases == + + +=== 3D conversion === +A 2012 3D re-release was created by re-mastering the original to 4K resolution and post-converting to stereoscopic 3D format. The Titanic 3D version took 60 weeks and $18 million to produce, including the 4K restoration. The 3D conversion was performed by Stereo D. Digital 2D and in 2D IMAX versions were also struck from the new 4K master created in the process. The only scene entirely redone for the re-release was Rose's view of the night sky at sea on the morning of April 15, 1912. The scene was replaced with an accurate view of the night-sky star pattern, including the Milky Way, adjusted for the location in the North Atlantic Ocean in April 1912. The change was prompted by the astrophysicist Neil deGrasse Tyson, who had criticized the unrealistic star pattern. He agreed to send Cameron a corrected view of the sky, which was the basis of the new scene. + +The 3D version of Titanic premiered at the Royal Albert Hall in London on March 27, 2012, with James Cameron and Kate Winslet in attendance, and entered general release on April 4, 2012, six days before the centenary of Titanic embarking on her maiden voyage.Rolling Stone film critic Peter Travers rated the reissue 3+1⁄2 stars out of 4, explaining he found it "pretty damn dazzling". He said, "The 3D intensifies Titanic. You are there. Caught up like never before in an intimate epic that earns its place in the movie time capsule." Writing for Entertainment Weekly, Owen Gleiberman gave the film an A grade. He wrote, "For once, the visuals in a 3-D movie don't look darkened or distracting. They look sensationally crisp and alive." Richard Corliss of Time, who was very critical in 1997, remained in the same mood: "I had pretty much the same reaction: fitfully awed, mostly water-logged." In regards to the 3D effects, he noted the "careful conversion to 3D lends volume and impact to certain moments ... [but] in separating the foreground and background of each scene, the converters have carved the visual field into discrete, not organic, levels." Ann Hornaday for The Washington Post found herself asking "whether the film's twin values of humanism and spectacle are enhanced by Cameron's 3-D conversion, and the answer to that is: They aren't." She added that the "3-D conversion creates distance where there should be intimacy, not to mention odd moments in framing and composition."The film grossed an estimated $4.7 million on the first day of its re-release in North America (including midnight preview showings) and went on to make $17.3 million over the weekend, finishing in third place behind The Hunger Games and American Reunion. Outside North America it earned $35.2 million, finishing second, and it improved on its performance the following weekend by topping the box office with $98.9 million. China has proven to be its most successful territory, where it earned $11.6 million on its opening day, going on to earn a record-breaking $67 million in its opening week and taking more money in the process than it did in the entirety of its original theatrical run.The reissue ultimately earned $343.4 million worldwide, with $145 million coming from China and $57.8 million from Canada and the United States. With a worldwide box office of nearly $350 million, the 3D re-release of Titanic remains the highest grossing re-released film of all time, ahead of The Lion King, Star Wars, and Avatar.The 3D conversion of the film was also released in the 4DX format in selected international territories, which allows the audience to experience the film's environment using motion, wind, fog, lighting and scent-based special effects. + + +=== 20th anniversary === +For the 20th anniversary of the film, Titanic was re-released in cinemas in Dolby Vision (in both 2D and 3D) for one week beginning December 1, 2017. + + +=== 25th anniversary === +Titanic was re-released in theaters by Paramount domestically and Walt Disney Studios Motion Pictures (through the 20th Century Studios label) internationally on February 10, 2023, in a remastered 3D 4K HDR render, with high frame rate, as part of the film's 25th anniversary. For this version, the international prints update 20th Century's logo with the studio's current name, as a result of Disney's 2019 acquisition of the studio. + + +== Titanic Live == +Titanic Live was a live performance of James Horner's original score by a 130-piece orchestra, choir and Celtic musicians, accompanying a showing of the film. In April 2015, Titanic Live premiered at the Royal Albert Hall, London, where the 2012 3D re-release had premiered. + + +== Merchandise == +In 1998, an official tie-in CD-ROM game was released, titled James Cameron's Titanic Explorer. The educational game covered the history of the vessel's construction, maiden voyage and sinking, as well as the discovery and exploration of the wreck. The game included deleted footage from the film and extensive 360-degree video footage of the film's sets. +In 2020, a board game based on the film, titled Titanic: The Game, was released by Spin Master Games. + + +== See also == +List of Academy Award records +Titanic: Music from the Motion Picture + + +== Notes == + + +== References == + + +== Further reading == + + +== External links == + +Official website +Titanic at IMDb +Titanic at the TCM Movie Database +Titanic at AllMovie +Titanic at The Numbers +Screenplay of Titanic at The Internet Movie Script Database +Paramount Movies - Titanic +YouTube video detailing model construction on YouTube \ No newline at end of file diff --git a/tools/evaluation/02_filling_RAG_outputs_for_Evaluation.ipynb b/tools/evaluation/02_filling_RAG_outputs_for_Evaluation.ipynb index b82497b87..0956471c8 100644 --- a/tools/evaluation/02_filling_RAG_outputs_for_Evaluation.ipynb +++ b/tools/evaluation/02_filling_RAG_outputs_for_Evaluation.ipynb @@ -327,7 +327,7 @@ "vector_store = MilvusVectorStore(uri=\"http://milvus:19530\",\n", " dim=1024,\n", " collection_name=\"document_store_ivfflat\",\n", - " index_config={\"index_type\": \"IVF_FLAT\", \"nlist\": 64},\n", + " index_config={\"index_type\": \"GPU_IVF_FLAT\", \"nlist\": 64},\n", " search_config={\"nprobe\": 16},\n", " overwrite=False\n", ")\n", diff --git a/tools/evaluation/04_Human_Like_RAG_Evaluation-AIP.ipynb b/tools/evaluation/04_Human_Like_RAG_Evaluation-AIP.ipynb index fb310d08e..df0c77b89 100644 --- a/tools/evaluation/04_Human_Like_RAG_Evaluation-AIP.ipynb +++ b/tools/evaluation/04_Human_Like_RAG_Evaluation-AIP.ipynb @@ -158,7 +158,7 @@ " \"[The End of Assistant's Answer]\"\n", " '\"Rating\": 1, \"Explanation\": \"The answer is not helpful or relevant. It does not answer the question and instead goes off topic.\"'\n", " \"\"\n", - " \"Following the exact same format as above, what is the rating and explanation for the following assistant's answer\"\n", + " \"Follow the exact same format as above. Put Rating first and Explanation second. Rating must be between 1 and 5. What is the rating and explanation for the following assistant's answer\"\n", " \"[Question]\"\n", " \"{question}\"\n", " \"[The Start of the Reference Context]\"\n", @@ -221,6 +221,7 @@ "\n", " response_body = response.json()\n", " llama_judge_responses.append(response_body['choices'][0]['message']['content'])\n", + " print(f\"progress: {len(llama_judge_responses)}/{len(data)}\", end='\\r')\n", " except Exception as e:\n", " print(\"Exception:\", e)\n", " llama_judge_responses.append(None)\n"