# compile
make -j
# obtain the StarCoder model weights and place them in ./models
git clone https://huggingface.co/bigcode/starcoder
mv starcoder ./models
ls ./models/starcoder
# install Python environment and dependencies
conda create -n llama_cpp python=3.10
conda activate llama_cpp
pip install -r requirements.txt
# convert the 7B model to ggml FP16 format
python convert-hf-to-gguf.py models/starcoder/
# quantize the model to 4-bits (using q4_0 method)
./quantize ./models/starcoder/ggml-model-f16.gguf ./models/starcoder/ggml-model-q4_0.gguf q4_0
# run the inference
./main -m ./models/starcoder/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -t 4 --temp -1 -n 128
forked from ggml-org/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Port of Facebook's LLaMA model in C/C++
License
RaymondWang0/llama.cpp
Folders and files
Name | Name | Last commit message | Last commit date | |
---|---|---|---|---|
Repository files navigation
About
Port of Facebook's LLaMA model in C/C++
Resources
License
Stars
Watchers
Forks
Releases
No releases published
Packages 0
No packages published
Languages
- C 45.3%
- C++ 28.5%
- Cuda 10.0%
- Python 5.9%
- Metal 3.3%
- Objective-C 2.8%
- Other 4.2%