|
250 | 250 | "source": [
|
251 | 251 | "%%writefile code_llama2_13b_fp16/serving.properties\n",
|
252 | 252 | "engine = MPI\n",
|
253 |
| - "option.tensor_parallel_degree = 4\n", |
| 253 | + "option.tensor_parallel_degree = max\n", |
254 | 254 | "option.rolling_batch = auto\n",
|
255 |
| - "option.max_rolling_batch_size = 8\n", |
| 255 | + "option.max_rolling_batch_size = 32\n", |
256 | 256 | "option.model_loading_timeout = 3600\n",
|
257 | 257 | "option.model_id = {{model_id}}\n",
|
258 |
| - "option.paged_attention = true\n", |
259 | 258 | "option.trust_remote_code = true\n",
|
260 |
| - "option.dtype = fp16\n", |
261 |
| - "option.enable_streaming=True" |
| 259 | + "option.dtype = fp16" |
262 | 260 | ]
|
263 | 261 | },
|
264 | 262 | {
|
|
296 | 294 | "outputs": [],
|
297 | 295 | "source": [
|
298 | 296 | "inference_image_uri = image_uris.retrieve(\n",
|
299 |
| - " framework=\"djl-deepspeed\", region=region, version=\"0.23.0\"\n", |
| 297 | + " framework=\"djl-deepspeed\", region=region, version=\"0.26.0\"\n", |
300 | 298 | ")\n",
|
301 | 299 | "print(f\"Image going to be used is ---- > {inference_image_uri}\")"
|
302 | 300 | ]
|
|
1134 | 1132 | ],
|
1135 | 1133 | "instance_type": "ml.t3.medium",
|
1136 | 1134 | "kernelspec": {
|
1137 |
| - "display_name": "Python 3 (PyTorch 2.0.0 Python 3.10 CPU Optimized)", |
| 1135 | + "display_name": "conda_pytorch_p310", |
1138 | 1136 | "language": "python",
|
1139 |
| - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/pytorch-2.0.0-cpu-py310" |
| 1137 | + "name": "conda_pytorch_p310" |
1140 | 1138 | },
|
1141 | 1139 | "language_info": {
|
1142 | 1140 | "codemirror_mode": {
|
|
1148 | 1146 | "name": "python",
|
1149 | 1147 | "nbconvert_exporter": "python",
|
1150 | 1148 | "pygments_lexer": "ipython3",
|
1151 |
| - "version": "3.10.8" |
| 1149 | + "version": "3.10.13" |
1152 | 1150 | }
|
1153 | 1151 | },
|
1154 | 1152 | "nbformat": 4,
|
|
0 commit comments