gguf 사용 코드

cookbook

coconut0115 2024. 9. 2. 16:12

from llama_cpp import Llama

def load_model(repo_id, filename, chat_format = 'llama-3'):

model = Llama.from_pretrained(

# model_path="./Phi-3-mini-4k-instruct-q4.gguf", # 다운 받은 후 사용할 때

repo_id=repo_id,

filename=filename,

n_gpu_layers=-1,

chat_format = chat_format,

n_ctx = 4000,

)

return model

def generate_chat_completion(user_content, system_content=system_content, model=model, max_tokens=128):

messages = [

{"role": "system", "content": system_content},

# {"role": "user", "content": "hello"},

# {"role": "assistant", "content": "Hello! How can I assist you today?"},

{"role": "user", "content": user_content}

]

output = model.create_chat_completion(

messages=messages,

max_tokens=max_tokens

)

return output['choices'][0]['message']['content']

chat_format 찾을 때

# 사용코드

model = load_model(repo_id,filename)

system_content = "You are a helpful assistant."

generate_chat_completion('안녕')