cookbook
gguf 사용 코드
coconut0115
2024. 9. 2. 16:12
from llama_cpp import Llama
def load_model(repo_id, filename, chat_format = 'llama-3'):
model = Llama.from_pretrained(
# model_path="./Phi-3-mini-4k-instruct-q4.gguf", # 다운 받은 후 사용할 때
repo_id=repo_id,
filename=filename,
n_gpu_layers=-1,
chat_format = chat_format,
n_ctx = 4000,
)
return model
def generate_chat_completion(user_content, system_content=system_content, model=model, max_tokens=128):
messages = [
{"role": "system", "content": system_content},
# {"role": "user", "content": "hello"},
# {"role": "assistant", "content": "Hello! How can I assist you today?"},
{"role": "user", "content": user_content}
]
output = model.create_chat_completion(
messages=messages,
max_tokens=max_tokens
)
return output['choices'][0]['message']['content']
chat_format 찾을 때
# 사용코드
model = load_model(repo_id,filename)
system_content = "You are a helpful assistant."
generate_chat_completion('안녕')