cookbook

gguf 사용 코드

coconut0115 2024. 9. 2. 16:12
from llama_cpp import Llama
def load_model(repo_id, filename, chat_format = 'llama-3'):
    model = Llama.from_pretrained(
        # model_path="./Phi-3-mini-4k-instruct-q4.gguf", # 다운 받은 후 사용할 때
        repo_id=repo_id,
        filename=filename,
        n_gpu_layers=-1,
        chat_format = chat_format,
        n_ctx = 4000,
    )
    return model
def generate_chat_completion(user_content, system_content=system_content, model=model, max_tokens=128):
    messages = [
        {"role": "system", "content": system_content},
        # {"role": "user", "content": "hello"},
        # {"role": "assistant", "content": "Hello! How can I assist you today?"},
        {"role": "user", "content": user_content}
    ]

    output = model.create_chat_completion(
        messages=messages,
        max_tokens=max_tokens
    )

    return output['choices'][0]['message']['content']
 
 
 

 

# 사용코드
model = load_model(repo_id,filename)
system_content = "You are a helpful assistant."
generate_chat_completion('안녕')