Update README.md

kedarpotdar-nv · web-flow · commit 5ed72c0a7906 · 2023-11-05T17:44:34.000-08:00
diff --git a/README.md b/README.md
@@ -89,7 +89,7 @@ git clone https://github.com/NVIDIA/TensorRT-LLM.git
 
 Navigate to the examples\llama directory and run the following script:
 ```
-python build.py --model_dir <path to llama13_chat model> --quant_ckpt_path <path to model.pt> --dtype float16 --use_gpt_attention_plugin float16 --use_gemm_plugin float16 --use_weight_only --weight_only_precision int4_awq --per_group --enable_context_fmha --max_batch_size 1 --max_input_len 3500 --max_output_len 1024 --output_dir <TRT engine folder>
+python build.py --model_dir <path to llama13_chat model> --quant_ckpt_path <path to model.pt> --dtype float16 --use_gpt_attention_plugin float16 --use_gemm_plugin float16 --use_weight_only --weight_only_precision int4_awq --per_group --enable_context_fmha --max_batch_size 1 --max_input_len 3000 --max_output_len 1024 --output_dir <TRT engine folder>
 ```
 
 ## Adding your own data