Skip to content
This repository has been archived by the owner on May 12, 2023. It is now read-only.

Commit

Permalink
Upgraded the code to support GPT4All requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
ParisNeo committed Apr 26, 2023
1 parent a3b895e commit 0f1f0ad
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 7 deletions.
126 changes: 126 additions & 0 deletions examples/backend_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
######
# Project : GPT4ALL-UI
# File : backend_test.py
# Author : ParisNeo with the help of the community
# Supported by Nomic-AI
# Licence : Apache 2.0
# Description :
# This is an example of a pygpt4all-ui binding for llamacpp and gpt-j models
# Tests generation

# To call :
# python backend_test.py -m <model path> --prompt <your prompt> --trigger_stop_after <trigger stop after how many tokens? to test how the backend handles sopping generation>
######
from pathlib import Path
from typing import Callable
from pyllamacpp.model import Model
import argparse
import sys

__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"

backend_name = "LLAMACPP"

class LLAMACPP():
file_extension='*.bin'
def __init__(self, model_path, config:dict) -> None:
"""Builds a LLAMACPP backend
Args:
config (dict): The configuration file
"""
self.config = config

self.model = Model(
ggml_model=model_path,
n_ctx=self.config['ctx_size'],
seed=self.config['seed'],
)

def get_num_tokens(self, prompt):
return self.model.num_tokens(prompt)

def generate(self,
prompt:str,
n_predict: int = 128,
new_text_callback: Callable[[str], None] = bool,
verbose: bool = False,
**gpt_params ):
"""Generates text out of a prompt
Args:
prompt (str): The prompt to use for generation
n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
"""
try:
self.model.generate(
prompt,
new_text_callback=new_text_callback,
n_predict=n_predict,
temp=self.config['temp'],
top_k=self.config['top_k'],
top_p=self.config['top_p'],
repeat_penalty=self.config['repeat_penalty'],
repeat_last_n = self.config['repeat_last_n'],
n_threads=self.config['n_threads'],
verbose=verbose
)
except Exception as ex:
print(ex)

if __name__=="__main__":
# create an ArgumentParser object
parser = argparse.ArgumentParser()

# add the -m or --model_path argument (./models/llama_cpp/ is for gpt4all-ui default structure)
parser.add_argument("-m", "--model_path", default="./models/llama_cpp/", help="path to the model file")
parser.add_argument('--temp', type=float, default=0.5)
parser.add_argument('--top_k', type=int, default=40)
parser.add_argument('--top_p', type=float, default=0.95)
parser.add_argument('--repeat_penalty', type=float, default=1.3)
parser.add_argument('--repeat_last_n', type=int, default=5)
parser.add_argument('--n_threads', type=int, default=8)
parser.add_argument('--ctx_size', type=int, default=512)
parser.add_argument('--seed', type=int, default=-1)
parser.add_argument('--prompt', type=str, default='Once apon a time')
parser.add_argument('--trigger_stop_after', type=int, default=-1)
# parse the arguments
args = parser.parse_args()
config = {
'temp': args.temp,
'top_k': args.top_k,
'top_p': args.top_p,
'repeat_penalty': args.repeat_penalty,
'repeat_last_n': args.repeat_last_n,
'n_threads': args.n_threads,
'ctx_size': args.ctx_size,
'seed': args.seed
}

backend = LLAMACPP(args.model_path, config)

#Not good to use global, but this is a quick example so nevermind
global counter
counter = 0

def callback(text):
global counter
print(text,end="")
sys.stdout.flush()
# test the stop generation after a number of words
counter +=1
if args.trigger_stop_after>0:
if counter>=args.trigger_stop_after:
return False

return True

num_tokens = backend.get_num_tokens(args.prompt)
print(f"Prompt has {num_tokens} tokens")
output_text = backend.generate(args.prompt,new_text_callback=callback)
print("Text : output_text")
20 changes: 16 additions & 4 deletions pyllamacpp/pyllamacpp/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,16 @@ def _set_params(params, kwargs: dict) -> None:
for param in kwargs:
setattr(params, param, kwargs[param])

def _call_new_text_callback(self, text) -> None:
def _call_new_text_callback(self, text) -> bool:
"""
Internal new_segment_callback, it just calls the user's callback with the `Segment` object
:return: None
:return: bool (continue generation?)
"""
# the callback returns either a boolean or a None
if Model._new_text_callback is not None:
Model._new_text_callback(text)
continue_gen = Model._new_text_callback(text)
if not(continue_gen is None or continue_gen==True):
self._ctx.continue_gen = False
# save res
self.res += text

Expand All @@ -90,9 +93,18 @@ def _call_grab_text_callback(self) -> str:
return Model._grab_text_callback()
return None

def num_tokens(self, prompt:str):
"""
Computes the number of tokens from the prompt text
:param prompt: the prompt
:return: the prompt
"""
return pp.llama_get_nb_tokens(self._ctx, prompt)

def generate(self, prompt: str,
n_predict: int = 128,
new_text_callback: Callable[[str], None] = None,
new_text_callback: Callable[[str], None] = bool,
grab_text_callback: Callable[[], str] = None,
verbose: bool = False,
**gpt_params) -> str:
Expand Down
23 changes: 20 additions & 3 deletions pyllamacpp/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ static bool is_interacting = false;
py::function py_llama_progress_callback;

struct llama_context_wrapper {
bool continue_gen = true; // Continue text generation
llama_context* ptr;
};

Expand Down Expand Up @@ -165,10 +166,21 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
return "The";
}

// This is a function to return the tokens number
// Needed by front end to optimize data size
int llama_get_nb_tokens(struct llama_context_wrapper * ctx_w, std::string prompt){
// tokenize the prompt
auto embd_inp = ::llama_tokenize_wrapper(ctx_w, prompt, true);
return embd_inp.size();
}

// quick and dirty implementation! just copied from main.cpp with some minor changes
// Needs lots of improvements
int llama_generate(struct llama_context_wrapper * ctx_w, gpt_params params, py::function new_text_callback, py::function grab_text_callback, bool verbose){

// Set continue_gen to true
ctx_w->continue_gen = true;

if (params.perplexity) {
printf("\n************\n");
printf("%s: please use the 'perplexity' tool for perplexity calculations\n", __func__);
Expand Down Expand Up @@ -447,7 +459,12 @@ int llama_generate(struct llama_context_wrapper * ctx_w, gpt_params params, py::
if (!input_noecho) {
for (auto id : embd) {
// printf("%s", llama_token_to_str(ctx, id));
// If the host wants to stop generation, we should stop
new_text_callback(llama_token_to_str(ctx, id));
if(!ctx_w->continue_gen){
llama_print_timings(ctx);
return 0;
}
}
fflush(stdout);
}
Expand Down Expand Up @@ -606,7 +623,8 @@ PYBIND11_MODULE(_pyllamacpp, m) {
.def_readwrite("verbose_prompt", &gpt_params::verbose_prompt)
.def_readwrite("antiprompt", &gpt_params::antiprompt);

py::class_<llama_context_wrapper>(m,"llama_context");
py::class_<llama_context_wrapper>(m,"llama_context")
.def_readwrite("continue_gen", &llama_context_wrapper::continue_gen);

py::class_<llama_token_data>(m,"llama_token_data")
.def(py::init<>())
Expand Down Expand Up @@ -657,10 +675,9 @@ PYBIND11_MODULE(_pyllamacpp, m) {

m.def("llama_print_system_info", &llama_print_system_info);

m.def("llama_get_nb_tokens", &llama_get_nb_tokens);
m.def("llama_generate", &llama_generate);



#ifdef VERSION_INFO
m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
#else
Expand Down

0 comments on commit 0f1f0ad

Please sign in to comment.