diff --git a/.gitignore b/.gitignore index cbf9a5b3..0864e9d0 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,7 @@ Makefile cmake_install.cmake splashkit_test projects/cmake/Resources +llama_ext-prefix .ninja_deps .ninja_log build.ninja @@ -101,6 +102,7 @@ out/lib/ tools/scripts/nuget-pkg/obj tools/scripts/test/obj + ### Debian packaging ### tools/scripts/debian/libsplashkit-dev* tools/scripts/debian/data.tar.xz diff --git a/coresdk/external b/coresdk/external index e089bc3c..d9c7ca08 160000 --- a/coresdk/external +++ b/coresdk/external @@ -1 +1 @@ -Subproject commit e089bc3ccbd7ff11027a790be44f6ab6038b5c58 +Subproject commit d9c7ca08ca9dbb0051bf57ceadb1d7a2d0f8d536 diff --git a/coresdk/src/backend/backend_types.h b/coresdk/src/backend/backend_types.h index 03083a86..2eeead1e 100644 --- a/coresdk/src/backend/backend_types.h +++ b/coresdk/src/backend/backend_types.h @@ -64,6 +64,7 @@ namespace splashkit_lib ADC_PTR= 0x41444350, //'ADCP'; MOTOR_DRIVER_PTR = 0x4d444950, //'MDIP'; SERVO_DRIVER_PTR = 0x53455256, //'SERV'; + CONVERSATION_PTR = 0x434f4e56, //'CONV'; NONE_PTR = 0x4e4f4e45 //'NONE'; }; diff --git a/coresdk/src/backend/genai_driver.cpp b/coresdk/src/backend/genai_driver.cpp new file mode 100644 index 00000000..c6f3a05b --- /dev/null +++ b/coresdk/src/backend/genai_driver.cpp @@ -0,0 +1,287 @@ +// +// genai_driver.cpp +// sk +// +// Created by Sean Boettger on 19/12/2025. +// +#include +#include +#include + +#include "genai_driver.h" +#include "core_driver.h" +#include "utility_functions.h" + +namespace splashkit_lib +{ + namespace llamacpp + { + + static void llama_log_callback_null(ggml_log_level level, const char * text, void * user_data){/* nothing, avoid unnecessary logging*/} + + void init() + { + static bool initialized = false; + if (!initialized) + { + llama_log_set(llama_log_callback_null, NULL); + + ggml_backend_load_all(); + + // Create custom logger with colouring + el::Configurations conf; + conf.setToDefault(); + conf.setGlobally(el::ConfigurationType::Format, "%level -> %msg"); + conf.setGlobally(el::ConfigurationType::Filename, "logs/splashkit.log"); + + // `el::Loggers::addFlag(el::LoggingFlag::ColoredTerminalOutput);` would be better but has global effect + conf.set(el::Level::Warning, el::ConfigurationType::Format, "\x1b[33m%level -> %msg\x1b[0m"); + conf.set(el::Level::Error, el::ConfigurationType::Format, "\x1b[31m%level -> %msg\x1b[0m"); + + el::Loggers::reconfigureLogger("GenAI", conf); + + initialized = true; + } + } + + model create_model(std::string path) + { + ggml_backend_load_all(); + + // initialize the model + llama_model_params model_params = llama_model_default_params(); + model_params.n_gpu_layers = 0; // cpu-only + + llama_model * model = llama_model_load_from_file(path.c_str(), model_params); + + if (model == NULL) + { + CLOG(ERROR, "GenAI") << "Unable to load language model from " << path << " - it may be corrupted or missing."; + return {false}; + } + + if (llama_model_has_encoder(model)) + { + llama_model_free(model); + CLOG(ERROR, "GenAI") << "Unsupported model, requires encoder-decoder support."; + return {false}; + } + + const llama_vocab * vocab = llama_model_get_vocab(model); + const char* tmpl = llama_model_chat_template(model, /* name */ nullptr); + + return { + true, + model, + vocab, + tmpl + }; + } + + void delete_model(model mdl) + { + if (!mdl.valid) + return; + + if (!mdl.model) + return; + + llama_model_free(mdl.model); + } + + std::string format_chat(model& mdl, const std::vector& messages, bool add_assistant) + { + std::vector llama_formatted; + std::vector formatted(0); + + llama_formatted.reserve(messages.size()); + + for (const message& msg : messages) + { + llama_formatted.push_back({msg.role.c_str(), msg.content.c_str()}); + } + + int new_len = llama_chat_apply_template(mdl.tmpl, llama_formatted.data(), llama_formatted.size(), add_assistant, formatted.data(), formatted.size()); + if (new_len > (int)formatted.size()) + { + formatted.resize(new_len); + new_len = llama_chat_apply_template(mdl.tmpl, llama_formatted.data(), llama_formatted.size(), add_assistant, formatted.data(), formatted.size()); + } + + return std::string(formatted.begin(), formatted.end()); + } + + llama_tokens tokenize_string(model& mdl, const std::string& prompt, bool is_first) + { + // get token count + // note: returns a negative number, the count of tokens it would have returned if the buffer was large enough + const int n_prompt = -llama_tokenize(mdl.vocab, prompt.data(), prompt.size(), NULL, 0, is_first, true); + + // create buffer + std::vector prompt_tokens(n_prompt); + + // recieve the tokens + if (llama_tokenize(mdl.vocab, prompt.data(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), is_first, true) < 0) + { + CLOG(ERROR, "GenAI") << "Failed to tokenize the prompt."; + return {}; + } + + return prompt_tokens; + } + + context start_context(model& mdl, llama_tokens& starting_context, inference_settings settings) + { + // Create the context + llama_context_params ctx_params = llama_context_default_params(); + ctx_params.n_ctx = starting_context.size() + settings.max_length - 1; + ctx_params.n_batch = ctx_params.n_ctx; + ctx_params.no_perf = true; + + llama_context * ctx = llama_init_from_model(mdl.model, ctx_params); + + if (ctx == NULL) + { + CLOG(ERROR, "GenAI") << "Failed to create the language model context."; + return {nullptr}; + } + + // Create the sampler + auto sparams = llama_sampler_chain_default_params(); + sparams.no_perf = true; + llama_sampler * smpl = llama_sampler_chain_init(sparams); + + // Set up sampler + llama_sampler_chain_add(smpl, llama_sampler_init_min_p(settings.min_p, 1)); + llama_sampler_chain_add(smpl, llama_sampler_init_temp(settings.temperature)); + llama_sampler_chain_add(smpl, llama_sampler_init_top_k(settings.top_k)); + llama_sampler_chain_add(smpl, llama_sampler_init_top_p(settings.top_p, 0)); + if (settings.presence_penalty > 0) + llama_sampler_chain_add(smpl, llama_sampler_init_penalties(64, 0, 0, settings.presence_penalty)); + llama_sampler_chain_add(smpl, llama_sampler_init_dist(settings.seed)); + + // Prepare batch for starting context + llama_tokens next_batch = starting_context; + + // Cache newline token - we use this manually in some spots + llama_token newline_token; + llama_tokenize(mdl.vocab, "\n", 1, &newline_token, 1, false, true); + + return + { + ctx, + smpl, + next_batch, + (int)ctx_params.n_ctx, + mdl.vocab, + newline_token, + 0, + {}, + false + }; + } + + int context_step(context& ctx, token_result* token) + { + const string THINKING_START = ""; + const string THINKING_END = ""; + + if (!ctx.ctx) + return -1; + + llama_batch batch = llama_batch_get_one(ctx.next_batch.data(), ctx.next_batch.size()); + // Decode current batch with the model + if (llama_decode(ctx.ctx, batch)) + { + CLOG(ERROR, "GenAI") << "Failed to process response from language model."; + if (token) + token->type = token_result::NONE; + return -1; + } + + ctx.total_context.insert(ctx.total_context.end(), ctx.next_batch.begin(), ctx.next_batch.end()); + ctx.n_pos += batch.n_tokens; + + // Sample next token + llama_token new_token_id = llama_sampler_sample(ctx.smpl, ctx.ctx, -1); + + // Has the model finished its response? + if (llama_vocab_is_eog(ctx.vocab, new_token_id)) + { + if (token) + token->type = token_result::NONE; + return 1; + } + + char buf[128]; + int n = llama_token_to_piece(ctx.vocab, new_token_id, buf, sizeof(buf), 0, true); + if (n < 0) + { + CLOG(ERROR, "GenAI") << "Failed to convert response token from language model."; + return -1; + } + + std::string s(buf, n); + + if (token) + { + bool is_meta = s == THINKING_START || s == THINKING_END; + token->text = s; + if (is_meta) + token->type = token_result::META; + else if (ctx.in_thinking) + token->type = token_result::THINKING; + else + token->type = token_result::CONTENT; + } + + if (s == THINKING_START) + ctx.in_thinking = true; + else if (s == THINKING_END) + ctx.in_thinking = false; + + // prepare the next batch with the sampled token + ctx.next_batch = {new_token_id}; + + // Have we reached the end of the context? + // If so, stop now. + if (ctx.n_pos + ctx.next_batch.size() >= ctx.ctx_size) + return 1; + + return 0; + } + + void add_to_context(context& ctx, llama_tokens& message) + { + ctx.next_batch.insert(ctx.next_batch.end(), message.begin(), message.end()); + } + + void manual_end_message(context& ctx) + { + ctx.next_batch.push_back(llama_vocab_eot(ctx.vocab)); + ctx.next_batch.push_back(ctx.newline_token); + } + + void delete_context(context& ctx) + { + if (ctx.smpl) + llama_sampler_free(ctx.smpl); + + if (ctx.ctx) + llama_free(ctx.ctx); + } + + void __print_debug_context(context& ctx) + { + for (auto& x : ctx.total_context) + { + char buf[128]; + int n = llama_token_to_piece(ctx.vocab, x, buf, sizeof(buf), 0, true); + + std::string s(buf, n); + std::cout << "|" << s; + } + std::cout << std::endl; + } + } +} diff --git a/coresdk/src/backend/genai_driver.h b/coresdk/src/backend/genai_driver.h new file mode 100644 index 00000000..b24c3e91 --- /dev/null +++ b/coresdk/src/backend/genai_driver.h @@ -0,0 +1,110 @@ +// +// genai_driver.h +// sk +// +// Created by Sean Boettger on 19/12/2025. +// + +#ifndef genai_driver_h +#define genai_driver_h + +#include "backend_types.h" + +#include "llama.h" + +namespace splashkit_lib +{ + typedef unsigned int uint; + + namespace llamacpp + { + typedef std::vector llama_tokens; + + struct model + { + bool valid; + llama_model* model; + const llama_vocab* vocab; + const char* tmpl; + }; + + struct inference_settings + { + double temperature = 0.6; + double top_p = 0.95; + int top_k = 20; + double min_p = 0; + double presence_penalty = 0; + int max_length = 256; + uint32_t seed = 42; + }; + + struct message + { + std::string role; + std::string content; + }; + + struct context + { + llama_context* ctx; + llama_sampler* smpl; + llama_tokens next_batch; + int ctx_size = 0; + + const llama_vocab* vocab; + llama_token newline_token; + + int n_pos; + llama_tokens total_context; + + bool in_thinking = false; + }; + + struct token_result + { + enum token_type { + NONE, + CONTENT, + THINKING, + META + }; + string text; + token_type type; + }; + + void init(); + + model create_model(std::string path); + void delete_model(model mdl); + + std::string format_chat(model& mdl, const std::vector& messages, bool add_assistant); + llama_tokens tokenize_string(model& mdl, const std::string& prompt, bool is_first); + + context start_context(model& mdl, llama_tokens& starting_context, inference_settings settings); + void delete_context(context& ctx); + + int context_step(context& ctx, token_result* token); + void add_to_context(context& ctx, llama_tokens& message); + void manual_end_message(context& ctx); + + void __print_debug_context(context& ctx); + } + + struct sk_conversation + { + pointer_identifier id; + + llamacpp::model model; + llamacpp::context context; + + bool was_generating; + bool is_generating; + + string prompt_append; + + llamacpp::token_result next_token; + }; +} + +#endif /* defined(graphics_driver) */ diff --git a/coresdk/src/backend/utility_functions.cpp b/coresdk/src/backend/utility_functions.cpp index 0e4cd5cd..6985c168 100644 --- a/coresdk/src/backend/utility_functions.cpp +++ b/coresdk/src/backend/utility_functions.cpp @@ -73,6 +73,10 @@ namespace splashkit_lib string path_to_user_home() { #ifndef WINDOWS + string home = get_env_var("HOME"); + if (home != "") + return home; + struct passwd *pw = getpwuid(getuid()); return string(pw->pw_dir); #else diff --git a/coresdk/src/backend/web_driver.cpp b/coresdk/src/backend/web_driver.cpp index f388e083..2305e91b 100644 --- a/coresdk/src/backend/web_driver.cpp +++ b/coresdk/src/backend/web_driver.cpp @@ -206,6 +206,67 @@ namespace splashkit_lib return _create_response(curl_handle, res, data_read); } + struct _sk_http_get_file_callback_data + { + void (*user_callback)(unsigned long, unsigned long); + int resuming_from; + }; + + int _sk_http_get_file_callback(_sk_http_get_file_callback_data* data, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) + { + data->user_callback(dltotal == 0 ? 0 : (data->resuming_from + dltotal), data->resuming_from + dlnow); + return 0; + } + + sk_http_response *sk_http_get_file(const string &filename, const string &host, unsigned short port, void (*user_callback)(unsigned long, unsigned long)) + { + const string temp_extension = ".temp"; + string temp_filename = filename+temp_extension; + + FILE *file = fopen(temp_filename.c_str(), "ab+"); + + // find resume point + fseek(file, 0L, SEEK_END); + curl_off_t resume_from = ftell(file); + + // init the curl session + CURL *curl_handle = curl_easy_init(); + CURLcode res; + + _init_curl(curl_handle, host, port); + + curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data); + curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, file); + + _sk_http_get_file_callback_data callback_data; + if (user_callback) + { + curl_easy_setopt(curl_handle, CURLOPT_XFERINFOFUNCTION, _sk_http_get_file_callback); + curl_easy_setopt(curl_handle, CURLOPT_XFERINFODATA, &callback_data); + curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0); + + callback_data.user_callback = user_callback; + callback_data.resuming_from = resume_from; + } + + curl_easy_setopt(curl_handle, CURLOPT_RESUME_FROM_LARGE, resume_from); + + // get it! + res = curl_easy_perform(curl_handle); + + fclose(file); + + // try renaming the temp file if the download was okay - rename returns 0 on success + if (res == CURLE_OK && rename(temp_filename.c_str(), filename.c_str())) + { + LOG(WARNING) << "Failed to rename temporary download file " << temp_filename << " to " << filename; + return nullptr; + } + + request_stream data_read = { nullptr, 0 }; + return _create_response(curl_handle, res, data_read); + } + sk_http_response *sk_http_put(const string &host, unsigned short port, const string &body) { request_stream data_read = { nullptr, 0 }; diff --git a/coresdk/src/backend/web_driver.h b/coresdk/src/backend/web_driver.h index e5e54de2..f5e4810a 100644 --- a/coresdk/src/backend/web_driver.h +++ b/coresdk/src/backend/web_driver.h @@ -17,6 +17,7 @@ namespace splashkit_lib sk_http_response *sk_http_post(const string &host, unsigned short port, const string &body); sk_http_response *sk_http_get(const string &host, unsigned short port); + sk_http_response *sk_http_get_file(const string &filename, const string &host, unsigned short port, void (*user_callback)(unsigned long, unsigned long)); sk_http_response *sk_http_put(const string &host, unsigned short port, const string &body); sk_http_response *sk_http_delete(const string &host, unsigned short port, const string &body); sk_http_response *sk_http_make_request(const sk_http_request &request); diff --git a/coresdk/src/coresdk/genai.cpp b/coresdk/src/coresdk/genai.cpp new file mode 100644 index 00000000..cde93610 --- /dev/null +++ b/coresdk/src/coresdk/genai.cpp @@ -0,0 +1,499 @@ +// +// genai.cpp +// splashkit +// +// Created by Sean Boettger on 20/12/25. +// + +#include "genai_driver.h" +#include "genai.h" +#include "utility_functions.h" +#include "web_driver.h" +#include "terminal.h" +#include "core_driver.h" + +#include + +using std::to_string; + +namespace splashkit_lib +{ + static vector objects; + + const language_model DEFAULT_LANGUAGE_MODEL = QWEN3_0_6B_INSTRUCT; + + const int default_max_tokens_base = 256; // base has a higher likelihood of running forever for no reason, better to limit it early + const int default_max_tokens_instruct = 4096; + const int default_max_tokens_thinking = 4096; + + extern const std::array models; // defined at end of file + + /* terminal util functions in lieu of ncurses*/ + void terminal_erase_left(int count /* -1 for all */) + { + if (count == 0) + return; + + if (count == -1) + write("\r\033[K"); + else + write("\033["+to_string(count)+"D\033[K"); + } + + std::vector terminal_stack; + + void terminal_push(const string &str) + { + write(str); + terminal_stack.push_back(str.size()); + } + + void terminal_pop() + { + terminal_erase_left(terminal_stack.back()); + terminal_stack.pop_back(); + } + + bool download_with_progress_bar(string filename, string url) + { + auto callback = [](unsigned long expected_size, unsigned long current_size) + { + terminal_pop(); + + if (expected_size == 0) + { + terminal_push(""); + return; + } + + static int spinner_index = 0; + + const int progress_bar_length = 10; + const string spinner = "|/-\\"; + + int expected_mb = expected_size / (1024 * 1024); + int current_mb = current_size / (1024 * 1024); + + // construct progress bar + int progress_bar_filled = 0; + if (expected_size > 0) + progress_bar_filled = (int)(progress_bar_length * current_size/(double)expected_size); + if (progress_bar_filled > progress_bar_length) + progress_bar_filled = progress_bar_length; + + string progress_bar = string(progress_bar_filled, '=') + string(progress_bar_length-progress_bar_filled, ' '); + if (progress_bar_filled <= progress_bar_length) + progress_bar[progress_bar_filled] = spinner[(spinner_index++)/2 % spinner.size()]; + + // write message + terminal_push(progress_bar + "| (" + to_string(current_mb) + "mb / " + to_string(expected_mb) + "mb)"); + }; + + terminal_push(""); + + sk_http_response * resp = sk_http_get_file(filename, url, 443, callback); + + terminal_pop(); + + return resp != nullptr && resp->code >= 200 && resp->code < 300; + } + + bool ensure_exists_or_download(string path, string url, string message) + { + if (std::filesystem::exists(path)) + return true; + + terminal_push(message); + + bool result = download_with_progress_bar(path, url); + + terminal_pop(); + + return result; + } + + llamacpp::model __get_model(language_model_options options) + { + llamacpp::init(); + + if (options.url != "" && !ensure_exists_or_download(options.path, options.url, " ::: Downloading Language Model: " + options.name + " |")) + { + CLOG(ERROR, "GenAI") << "Failed to download language model - see error above."; + return {false}; + } + + return llamacpp::create_model(options.path); + } + + string __generate_common(string prompt, language_model_options options, bool format_chat) + { + llamacpp::model model = __get_model(options); + + if (!model.valid) return ""; + + std::string formatted = prompt; + + if (format_chat) + { + formatted = llamacpp::format_chat(model, { + { + "user", prompt + options.prompt_append + }, + }, true); + } + llamacpp::llama_tokens tokens = llamacpp::tokenize_string(model, formatted, true); + + llamacpp::context ctx = llamacpp::start_context(model, tokens, { + options.temperature, + options.top_p, + options.top_k, + options.min_p, + options.presence_penalty, + options.max_tokens, + (uint32_t)options.seed + }); + + std::string result = ""; + llamacpp::token_result token; + + while (!llamacpp::context_step(ctx, &token)) + { + if (token.type == llamacpp::token_result::CONTENT) + result += token.text; + }; + + llamacpp::delete_context(ctx); + llamacpp::delete_model(model); + + return result; + } + + + string generate_reply(string prompt) + { + return generate_reply(DEFAULT_LANGUAGE_MODEL, prompt); + } + + string generate_reply(language_model model, string prompt) + { + return generate_reply(prompt, option_language_model(model)); + } + + string generate_reply(string prompt, language_model_options options) + { + return __generate_common(prompt, options, true); + } + + string generate_text(string text) + { + return generate_text(DEFAULT_LANGUAGE_MODEL, text); + } + + string generate_text(language_model model, string text) + { + return generate_text(text, option_language_model(model)); + } + + string generate_text(string text, language_model_options options) + { + return __generate_common(text, options, false); + } + + // -------------------------------------------------------------- + + // Streaming conversation + + #define CONVERSATION_CHECK(x, val) \ + if (INVALID_PTR(c, CONVERSATION_PTR))\ + {\ + LOG(WARNING) << "Passed an invalid conversation object to " x;\ + return val;\ + } + + conversation create_conversation() + { + return create_conversation(option_language_model(DEFAULT_LANGUAGE_MODEL)); + } + + conversation create_conversation(language_model model) + { + return create_conversation(option_language_model(model)); + } + + conversation create_conversation(language_model_options options) + { + internal_sk_init(); + + llamacpp::model model = __get_model(options); + + if (!model.valid) return nullptr; + + llamacpp::llama_tokens initial_tokens = llamacpp::tokenize_string(model, "", true); + + sk_conversation* c = new sk_conversation(); + c->id = CONVERSATION_PTR; + c->model = model; + c->context = llamacpp::start_context(model, initial_tokens, { + options.temperature, + options.top_p, + options.top_k, + options.min_p, + options.presence_penalty, + options.max_tokens, + (uint32_t)options.seed + });; + + c->was_generating = false; + c->is_generating = true; + + c->prompt_append = options.prompt_append; + + objects.push_back(c); + + return c; + }; + + void conversation_add_message(conversation c, const string& message) + { + CONVERSATION_CHECK("conversation_add_message", ) + + // end the language model's turn + if (c->was_generating) + { + c->was_generating = false; + llamacpp::manual_end_message(c->context); + } + + // tokenize user's prompt and add to context + llamacpp::llama_tokens tokens = llamacpp::tokenize_string(c->model, llamacpp::format_chat(c->model, { + {"user", message + c->prompt_append} + }, true), false); + llamacpp::add_to_context(c->context, tokens); + + // the model is ready to generate again + c->is_generating = true; + } + + void __buffer_next_token(conversation c) + { + if (c->next_token.type != llamacpp::token_result::token_type::NONE) + return; // already buffered + + // attempt to get next token that is non-meta + do + { + // if we reach the end of the message, return even if a meta token (shouldn't happen though) + if (llamacpp::context_step(c->context, &c->next_token)) + { + c->is_generating = false; + return; + } + } while (c->next_token.type == llamacpp::token_result::token_type::META); + } + + // These next three functions buffer the next token so that they can + // return information about it + bool conversation_is_replying(conversation c) + { + CONVERSATION_CHECK("conversation_is_replying", false) + + __buffer_next_token(c); + + return c->is_generating; + } + + bool conversation_is_thinking(conversation c) + { + CONVERSATION_CHECK("conversation_is_thinking", false) + + __buffer_next_token(c); + + return c->next_token.type == llamacpp::token_result::token_type::THINKING; + } + + string conversation_get_reply_piece(conversation c) + { + CONVERSATION_CHECK("conversation_get_reply_piece", "") + + // if the user wants a token, we can resume generating even if we already finished + c->is_generating = true; + c->was_generating = true; + + __buffer_next_token(c); + + // token is consumed + c->next_token.type = llamacpp::token_result::token_type::NONE; + + return c->next_token.text; + } + + void __free_conversation_resource(conversation c) + { + llamacpp::delete_context(c->context); + llamacpp::delete_model(c->model); + } + + void free_conversation(conversation c) + { + CONVERSATION_CHECK("free_conversation", ) + + __free_conversation_resource(c); + + for (auto it = objects.begin(); it != objects.end(); it++) + { + if (*it == c) + { + notify_of_free(c); + + delete *it; + + it = objects.erase(it); + return; + } + } + } + + void free_all_conversations() + { + for (conversation c : objects) + { + __free_conversation_resource(c); + } + + objects.clear(); + } + + // -------------------------------------------------------------- + + language_model_options option_language_model(language_model model) + { + if (model < 0 || model >= models.size() || models[model].name == "") + { + model = DEFAULT_LANGUAGE_MODEL; + CLOG(WARNING, "GenAI") << "Invalid model selected, defaulting to '" << models[model].name << "'"; + } + + string home_path = path_from( {path_to_user_home(), ".splashkit", "models"} ); + + language_model_options options = models[model]; + options.path = home_path + options.path; + options.seed = 0; + + return options; + } + + // -------------------------------------------------------------- + + + // default model definitions + + const std::array models = {{ + [0]={}, [1]={}, [2]={}, [3]={}, + + [QWEN3_0_6B_BASE] = { + "Qwen3 0.6B Base", + "https://huggingface.co/mradermacher/Qwen3-0.6B-Base-GGUF/resolve/main/Qwen3-0.6B-Base.Q8_0.gguf?download=true", + "Qwen3-0.6B-Base.Q8_0.gguf", + default_max_tokens_base, 0.7, 0.8, 20, 0, 1.5 + }, + [QWEN3_0_6B_INSTRUCT] = { + "Qwen3 0.6B Instruct", + "https://huggingface.co/Qwen/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q8_0.gguf?download=true", + "Qwen3-0.6B-Q8_0.gguf", + default_max_tokens_instruct, 0.7, 0.8, 20, 0, 1.5, " /no_think" + }, + [QWEN3_0_6B_THINKING] = { + "Qwen3 0.6B Thinking", + "https://huggingface.co/Qwen/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q8_0.gguf?download=true", + "Qwen3-0.6B-Q8_0.gguf", + default_max_tokens_thinking, 0.6, 0.95, 20, 0, 1.5 + }, + + [7]={}, + + [QWEN3_1_7B_BASE] = { + "Qwen3 1.7B Base", + "https://huggingface.co/mradermacher/Qwen3-1.7B-Base-GGUF/resolve/main/Qwen3-1.7B-Base.Q8_0.gguf?download=true", + "Qwen3-1.7B-Base.Q8_0.gguf", + default_max_tokens_base, 0.7, 0.8, 20, 0, 1.5 + }, + [QWEN3_1_7B_INSTRUCT] = { + "Qwen3 1.7B Instruct", + "https://huggingface.co/Qwen/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q8_0.gguf?download=true", + "Qwen3-1.7B-Q8_0.gguf", + default_max_tokens_instruct, 0.7, 0.8, 20, 0, 1.5, " /no_think" + }, + [QWEN3_1_7B_THINKING] = { + "Qwen3 1.7B Thinking", + "https://huggingface.co/Qwen/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q8_0.gguf?download=true", + "Qwen3-1.7B-Q8_0.gguf", + default_max_tokens_thinking, 0.6, 0.95, 20, 0, 1.5 + }, + + [11]={}, + + [QWEN3_4B_BASE] = { + "Qwen3 4B Base", + "https://huggingface.co/mradermacher/Qwen3-4B-Base-GGUF/resolve/main/Qwen3-4B-Base.Q2_K.gguf?download=true", + "Qwen3-4B-Base.Q2_K.gguf", + default_max_tokens_base, 0.7, 0.8, 20, 0, 0 + }, + [QWEN3_4B_INSTRUCT] = { + "Qwen3 4B Instruct", + "https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/resolve/main/Qwen3-4B-Instruct-2507-UD-Q2_K_XL.gguf?download=true", + "Qwen3-4B-Instruct-2507-UD-Q2_K_XL.gguf", + default_max_tokens_instruct, 0.7, 0.8, 20, 0, 0 + }, + [QWEN3_4B_THINKING] = { + "Qwen3 4B Thinking", + "https://huggingface.co/unsloth/Qwen3-4B-Thinking-2507-GGUF/resolve/main/Qwen3-4B-Thinking-2507-UD-Q2_K_XL.gguf?download=true", + "Qwen3-4B-Thinking-2507-UD-Q2_K_XL.gguf", + default_max_tokens_thinking, 0.6, 0.95, 20, 0, 0 + }, + + [15]={}, + + [GEMMA3_270M_BASE] = { + "Gemma3 270M Base", + "https://huggingface.co/ggml-org/gemma-3-270m-GGUF/resolve/main/gemma-3-270m-Q8_0.gguf?download=true", + "gemma-3-270m-Q8_0.gguf", + default_max_tokens_base, 1.0, 0.95, 64, 0, 0 + }, + [GEMMA3_270M_INSTRUCT] = { + "Gemma3 270M Instruct", + "https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-Q8_0.gguf?download=true", + "gemma-3-270m-it-Q8_0.gguf", + default_max_tokens_instruct, 1.0, 0.95, 64, 0, 0 + }, + + [18]={}, [19]={}, + + [GEMMA3_1B_BASE] = { + "Gemma3 1B Base", + "https://huggingface.co/mradermacher/gemma-3-1b-pt-GGUF/resolve/main/gemma-3-1b-pt.Q8_0.gguf?download=true", + "gemma-3-1b-pt.Q8_0.gguf", + default_max_tokens_base, 1.0, 0.95, 64, 0, 0 + }, + [GEMMA3_1B_INSTRUCT] = { + "Gemma3 1B Instruct", + "https://huggingface.co/unsloth/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it-Q8_0.gguf?download=true", + "gemma-3-1b-it-Q8_0.gguf", + default_max_tokens_instruct, 1.0, 0.95, 64, 0, 0 + }, + + [22]={}, [23]={}, + + [GEMMA3_4B_BASE] = { + "Gemma3 4B Base", + "https://huggingface.co/mradermacher/gemma-3-4b-pt-GGUF/resolve/main/gemma-3-4b-pt.Q2_K.gguf?download=true", + "gemma-3-4b-pt.Q2_K.gguf", + default_max_tokens_base, 1.0, 0.95, 64, 0, 0 + }, + [GEMMA3_4B_INSTRUCT] = { + "Gemma3 4B Instruct", + "https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-UD-IQ3_XXS.gguf?download=true", + "gemma-3-4b-it-UD-IQ3_XXS.gguf", + default_max_tokens_instruct, 1.0, 0.95, 64, 0, 0 + } + }}; +} diff --git a/coresdk/src/coresdk/genai.h b/coresdk/src/coresdk/genai.h new file mode 100644 index 00000000..c57f1fa8 --- /dev/null +++ b/coresdk/src/coresdk/genai.h @@ -0,0 +1,258 @@ +/** + * @header genai + * @author Sean Boettger + * @brief SplashKit gives you a simple way to use and embed local AIs in your projects, + * that run on your own computer. + * + * @attribute group generative_ai + * @attribute static generative_ai + */ + +#ifndef genai_hpp +#define genai_hpp + +#include "types.h" + +#include +#include + +using std::string; + +namespace splashkit_lib +{ + /** + * The `conversation` type is used to refer to conversations between the user + * and a language model. You can use it to send messages to the language model, + * and stream responses back. + * + * + * All `conversation` objects are: + * + * + * - created with `create_conversation()`, `create_conversation(language_model model)` or + * `create_conversation(language_model_options options)` + * + * + * - and must be released using `free_conversation()` (to release a specific `conversation` object) + * or `free_all_conversation()` (to release all created `conversation` objects). + * + * + * @attribute class conversation + */ + typedef struct sk_conversation *conversation; + + /** + * @brief Generates a reply to a textual prompt by a language model + * + * The language model will respond to the textual prompt in a chat style format. It will follow instructions and answer questions. + * Instruct or Thinking models are recommended. Base models likely won't output sensible results. + * + * @param prompt The prompt for the language model to reply to. + * + * @returns The generated reply. + */ + string generate_reply(string prompt); + + /** + * @brief Generates a reply to a textual prompt by a language model + * + * The language model will respond to the textual prompt in a chat style format. It will follow instructions and answer questions. + * Instruct or Thinking models are recommended. Base models likely won't output sensible results. + * + * @param model The language model to use + * @param prompt The prompt for the language model to reply to. + * + * @returns The generated reply. + * + * @attribute suffix with_model + */ + string generate_reply(language_model model, string prompt); + + /** + * @brief Generates a reply to a textual prompt by a language model + * + * The language model will respond to the textual prompt in a chat style format. It will follow instructions and answer questions. + * Instruct or Thinking models are recommended. Base models likely won't output sensible results. + * + * @param prompt The prompt for the language model to reply to. + * @param options The generation options - use the `option_` functions to create this, for instance `option_language_model` + * + * @returns The generated reply. + * + * @attribute suffix with_options + */ + string generate_reply(string prompt, language_model_options options); + + + /** + * @brief Generates text that continues from a prompt + * + * The language model will continue predicting text based on patterns in the prompt - it will not directly follow instructions or answer questions. + * Base models are recommended; Instruct and Thinking models may work. + * + * @param text The input text for the language model to continue. + * + * @returns The generated reply. + */ + string generate_text(string text); + + /** + * @brief Generates text that continues from a prompt + * + * The language model will continue predicting text based on patterns in the prompt - it will not directly follow instructions or answer questions. + * Base models are recommended; Instruct and Thinking models may work. + * + * @param model The language model to use + * @param text The input text for the language model to continue. + * + * @returns The generated reply. + * + * @attribute suffix with_model + */ + string generate_text(language_model model, string text); + + /** + * @brief Generates text that continues from a prompt + * + * The language model will continue predicting text based on patterns in the prompt - it will not directly follow instructions or answer questions. + * Base models are recommended; Instruct and Thinking models may work. + * + * @param text The input text for the language model to continue. + * @param options The generation options - use the `option_` functions to create this, for instance `option_language_model` + * + * @returns The generated reply. + * + * @attribute suffix with_options + */ + string generate_text(string text, language_model_options options); + + /** + * @brief Creates a new `conversation` object, that uses the default language model. + * + * The `conversation` object can have messages added to it, and responses streamed back from it via the other Conversation functions and procedures + * + * @returns Returns a new `conversation` object. + * + * @attribute class conversation + * @attribute constructor true + */ + conversation create_conversation(); + + /** + * @brief Creates a new `conversation` object, that uses a chosen language model. + * + * The `conversation` object can have messages added to it, and responses streamed back from it via the other Conversation functions and procedures + * + * @param model The language model to use + * + * @returns Returns a new `conversation` object. + * + * @attribute class conversation + * @attribute constructor true + * + * @attribute suffix with_model + */ + conversation create_conversation(language_model model); + + /** + * @brief Creates a new `conversation` object, that uses a chosen language model among other options. + * + * The `conversation` object can have messages added to it, and responses streamed back from it via the other Conversation functions and procedures + * + * @param options The options to use - use this to choose the language model, and change various parameters. + * + * @returns Returns a new `conversation` object. + * + * @attribute class conversation + * @attribute constructor true + * + * @attribute suffix with_options + */ + conversation create_conversation(language_model_options options); + + /** + * Checks if a language model is currently generating a reply within a `conversation`. + * If so, you can continue to receive the message with `conversation_get_reply_piece(conversation c)` + * + * @param c The `conversation` object to check + * + * @returns Returns whether the language model is still generating a reply + * + * @attribute class conversation + * @attribute method is_replying + * @attribute self c + */ + bool conversation_is_replying(conversation c); + + /** + * Checks if a language model is currently "thinking" while generating a reply within a `conversation`. + * You can use this to filter out the "thoughts" and display them differently (or hide them entirely) + * + * @param c The `conversation` object to check + * + * @returns Returns whether the language model is currently thinking while generating a reply + * + * @attribute class conversation + * @attribute method is_thinking + * @attribute self c + */ + bool conversation_is_thinking(conversation c); + + /** + * Adds a message to a `conversation`, that the language model will begin replying to. + * You can receive the reply one piece at a time by calling `conversation_get_reply_piece(conversation c)` in a loop + * + * @param c The `conversation` object to check + * @param message The user message to add to the conversation - the language model will reply to this + * + * @attribute class conversation + * @attribute method add_message + * @attribute self c + */ + void conversation_add_message(conversation c, const string& message); + + /** + * Returns a single piece of a reply (generally one word at a time) from the `conversation` + * You can use a loop while checking `conversation_is_replying` to retrieve the reply as it generates + * + * @param c The `conversation` object to recieve the reply from + * + * @returns Returns a small piece of the reply (generally 1 word or less) + * + * @attribute class conversation + * @attribute method get_reply_piece + * @attribute self c + */ + string conversation_get_reply_piece(conversation c); + + /** + * Frees the resources associated with the `conversation` object. + * + * @param c The `conversation` object whose resources should be released. + * + * @attribute class conversation + * @attribute destructor true + * @attribute self c + * @attribute method free + */ + void free_conversation(conversation c); + + /** + * Releases all of the `conversation` objects which have been loaded. + * + * @attribute static conversations + * @attribute method free_all + */ + void free_all_conversations(); + + /** + * Use this option to choose which language model to use, and initialize its default settings + * + * @param model The language model to use + * + * @return Language model options that will use that model and its default settings. + */ + language_model_options option_language_model(language_model model); + +} +#endif /* genai_hpp */ diff --git a/coresdk/src/coresdk/types.h b/coresdk/src/coresdk/types.h index 3573af9f..c46ce9f6 100644 --- a/coresdk/src/coresdk/types.h +++ b/coresdk/src/coresdk/types.h @@ -548,5 +548,75 @@ namespace splashkit_lib BUBBLE = 4, BUBBLE_MULTICOLORED = 5 }; + + /** + * Language Models: + * Choose between different language models to trade off speed and intelligence + * Each model is scaled to fit within 1~2GB and will be automatically downloaded when needed - feel free to try them out! + * + * @constant QWEN3_0_6B_BASE Qwen3 0.6B Base model - small, extremely fast and good for text commpletion. Very limited world knowledge. + * @constant QWEN3_0_6B_INSTRUCT Qwen3 0.6B Instruct model (default) - small, extremely fast and can follow simple instructions. Very limited world knowledge. + * @constant QWEN3_0_6B_THINKING Qwen3 0.6B Thinking model - small, extremely fast and can follow more specific instructions, but has a short delay before starting to reply. Very limited world knowledge. + * @constant QWEN3_1_7B_BASE Qwen3 1.7B Base model - decently fast and good for text commpletion. Limited world knowledge. + * @constant QWEN3_1_7B_INSTRUCT Qwen3 1.7B Instruct model - decently fast and can follow instructions. Limited world knowledge. + * @constant QWEN3_1_7B_THINKING Qwen3 1.7B Thinking model - decently fast and can follow more difficult instructions, but has a delay before starting to reply. Limited world knowledge. + * @constant QWEN3_4B_BASE Qwen3 4B Base model - slower but excellent for text commpletion/pattern based completion + * @constant QWEN3_4B_INSTRUCT Qwen3 4B Instruct model - slower but can follow complex instructions + * @constant QWEN3_4B_THINKING Qwen3 4B Thinking model - slower but can follow complex and specific instructions, but has a potentially long delay before starting to reply + * @constant GEMMA3_270M_BASE Gemma3 270M Base model - tiny, extremely fast, and good for text completion. Very limited world knowledge. + * @constant GEMMA3_270M_INSTRUCT Gemma3 270M Instruct model - tiny, extremely fast, and good for very simple instructions. Very limited world knowledge. + * @constant GEMMA3_1B_BASE Gemma3 1B Base model - fast and good for text completion. Has decent world knowledge and multi-lingual abilities. + * @constant GEMMA3_1B_INSTRUCT Gemma3 1B Instruct model - fast and can follow instructions. Has decent world knowledge and multi-lingual abilities. + * @constant GEMMA3_4B_BASE Gemma3 4B Base model - slower but good for text commpletion/pattern based completion. Has decent world knowledge and multi-lingual abilities. + * @constant GEMMA3_4B_INSTRUCT Gemma3 4B Instruct model - slower but can follow complex instructions. Has decent world knowledge and multi-lingual abilities. + */ + enum language_model + { + QWEN3_0_6B_BASE = 4, + QWEN3_0_6B_INSTRUCT = 5, + QWEN3_0_6B_THINKING = 6, + QWEN3_1_7B_BASE = 8, + QWEN3_1_7B_INSTRUCT = 9, + QWEN3_1_7B_THINKING = 10, + QWEN3_4B_BASE = 12, + QWEN3_4B_INSTRUCT = 13, + QWEN3_4B_THINKING = 14, + GEMMA3_270M_BASE = 16, + GEMMA3_270M_INSTRUCT = 17, + GEMMA3_1B_BASE = 20, + GEMMA3_1B_INSTRUCT = 21, + GEMMA3_4B_BASE = 24, + GEMMA3_4B_INSTRUCT = 25, + }; + + /** + * Language model options allow you to customize the language model used. These should be + * initialised using functions such as `option_language_model`. + * + * @field name The name of the model (used in diagnostic messages). + * @field url A URL to download a model from. + * @field path A path to a custom language model (.gguf) file on your computer/a place to download it to. + * @field max_tokens The maximum number of tokens to output when replying. One word is approximately two tokens. + * @field temperature Increases the likelihood of unlikely tokens to be chosen. + * @field top_p Only choose from the top P most likely tokens. + * @field top_k Only choose from the top K most likely tokens. + * @field min_p Remove tokens less likely than P. + * @field presence_penalty Penalizes words that have been used once, making them less likely. Can reduce repetition. + * @field prompt_append A string to append to prompts automatically. + */ + struct language_model_options + { + string name; + string url; + string path; + int max_tokens; + double temperature; + double top_p; + int top_k; + double min_p; + double presence_penalty; + string prompt_append; + int seed; + }; } #endif /* types_hpp */ diff --git a/coresdk/src/test/test_genai.cpp b/coresdk/src/test/test_genai.cpp new file mode 100644 index 00000000..669bea23 --- /dev/null +++ b/coresdk/src/test/test_genai.cpp @@ -0,0 +1,73 @@ +// +// test_genai.cpp +// splashkit +// +// Created by Sean Boettger on 20/12/2025. +// + +#include "genai.h" +#include "terminal.h" +#include "basics.h" +#include "utils.h" +#include +#include + +using namespace std; +using namespace splashkit_lib; + +void run_genai_test() +{ + const string THINKING_STYLE = "\033[37;3m"; + const string RESET_STYLE = "\033[0m"; + + conversation conv = create_conversation(QWEN3_1_7B_THINKING); + + while(true) + { + write("\n> "); + string prompt = read_line(); + + // See if the user wants to exit + string exit = trim(generate_reply(QWEN3_1_7B_INSTRUCT, "User A: "+prompt+"\nDoes user A want to end the conversation? Answer with one word, either CONTINUE or END:")); + + write_line("["+exit+"]"); + + if (exit == "END") + break; + + // otherwise continue the conversation + conversation_add_message(conv, prompt); + + bool thinking = false; + string last_piece = "\n"; + while(conversation_is_replying(conv)) + { + if (conversation_is_thinking(conv) != thinking) + { + thinking = conversation_is_thinking(conv); + + if (thinking) + write(THINKING_STYLE); + else + write(RESET_STYLE); + } + + string piece = conversation_get_reply_piece(conv); + + // avoid double newlines - ideally this will be filtered on SplashKit's side instead + if (piece == "\n" && last_piece == "\n") + continue; + + if (piece == "\n\n") + piece = "\n"; + + write(piece); + last_piece = piece; + } + + if (last_piece != "\n") + write("\n"); + } + + free_conversation(conv); +} diff --git a/coresdk/src/test/test_main.cpp b/coresdk/src/test/test_main.cpp index 0b6e8ab9..e4b7750a 100644 --- a/coresdk/src/test/test_main.cpp +++ b/coresdk/src/test/test_main.cpp @@ -68,6 +68,7 @@ void setup_tests() add_test("GPIO - SPI MAX7219 LED matrix Tests", run_gpio_spi_led_matrix_tests); add_test("GPIO - I2C HT16K33 LED matrix Tests", run_gpio_i2c_led_matrix_tests); add_test("GPIO - I2C HT16K33 LED 14 Segment Tests", run_gpio_i2c_quad_14_seg_test); + add_test("Gen AI", run_genai_test); } int main(int argv, char **args) diff --git a/coresdk/src/test/test_main.h b/coresdk/src/test/test_main.h index 1beddfc8..89f42267 100644 --- a/coresdk/src/test/test_main.h +++ b/coresdk/src/test/test_main.h @@ -44,5 +44,6 @@ void run_gpio_i2c_quad_14_seg_test(); void run_terminal_test(); void run_logging_test(); void run_ui_test(); +void run_genai_test(); #endif /* test_main_h */ diff --git a/projects/cmake/CMakeLists.txt b/projects/cmake/CMakeLists.txt index 0780489a..017e2a27 100644 --- a/projects/cmake/CMakeLists.txt +++ b/projects/cmake/CMakeLists.txt @@ -5,6 +5,7 @@ set(CMAKE_BUILD_TYPE Debug) cmake_policy(SET CMP0083 NEW) include(CheckPIESupported) +include(ExternalProject) check_pie_supported() # SK Directories relative to cmake project @@ -44,6 +45,7 @@ if (APPLE) -framework AudioToolbox \ -framework CoreAudio \ -framework CoreVideo \ + -framework Accelerate \ -lSDL2 \ -lSDL2_mixer \ -lSDL2_ttf \ @@ -245,6 +247,8 @@ include_directories("${SK_EXT}/hash-library") include_directories("${SK_EXT}/json") include_directories("${SK_EXT}/catch") include_directories("${SK_EXT}/microui/src") +include_directories("${SK_EXT}/llama.cpp/include") +include_directories("${SK_EXT}/llama.cpp/ggml/include") # MAC OS DIRECTORY INCLUDES if (APPLE) @@ -257,13 +261,60 @@ if (APPLE) include_directories("${SK_EXT}/SDL_image/external/libpng-1.6.2") endif() +# INCLUDE LLAMA.CPP + +# Included as an external project so that it can be configured +# as Release, independently of the main project. + +# Compiled as CPU only +# TODO: Decide on minimum architecture requirements +ExternalProject_Add( + llama_ext + SOURCE_DIR "${SK_EXT}/llama.cpp" + CMAKE_ARGS + -DLLAMA_BUILD_TESTS=OFF + -DLLAMA_BUILD_TOOLS=OFF + -DLLAMA_BUILD_EXAMPLES=OFF + -DLLAMA_BUILD_SERVER=OFF + -DGGML_BLAS=OFF + -DGGML_METAL=OFF + -DGGML_VULKAN=OFF + -DBUILD_SHARED_LIBS=OFF + -DLLAMA_BUILD_COMMON=OFF + -DLLAMA_TOOLS_INSTALL=OFF + -DCMAKE_BUILD_TYPE=Release + -DGGML_STATIC=ON + -DGGML_OPENMP=OFF + -DCMAKE_INSTALL_PREFIX= +) + +ExternalProject_Get_Property(llama_ext INSTALL_DIR) + +foreach(lib llama ggml ggml-cpu ggml-base) + add_library(${lib} STATIC IMPORTED GLOBAL) + if (MSYS AND NOT "${lib}" STREQUAL "llama") # llama still ends up as libllama.a on Windows, unsure why + set_target_properties(${lib} PROPERTIES + IMPORTED_LOCATION + ${INSTALL_DIR}/lib/${lib}.a # no lib prefix + ) + else() + set_target_properties(${lib} PROPERTIES + IMPORTED_LOCATION + ${INSTALL_DIR}/lib/lib${lib}.a # lib prefix + ) + endif() + add_dependencies(${lib} llama_ext) +endforeach() + +set(LLAMA_LIB_FLAGS llama ggml ggml-cpu ggml-base) + # MACRO DEFINITIONS # add_definitions(-DELPP_THREAD_SAFE) #### END SETUP #### #### SplashKitBackend STATIC LIBRARY #### add_library(SplashKitBackend STATIC ${SOURCE_FILES} ${INCLUDE_FILES}) -target_link_libraries(SplashKitBackend ${LIB_FLAGS}) +target_link_libraries(SplashKitBackend ${LIB_FLAGS} ${LLAMA_LIB_FLAGS}) if(RASPBERRY_PI) if(RASPBERRY_PI_5) @@ -373,4 +424,4 @@ catch_discover_tests(skunit_tests) #### END skunit_tests EXECUTABLE #### install(TARGETS SplashKitBackend DESTINATION lib) -install(FILES ${INCLUDE_FILES} DESTINATION include/SplashKitBackend) \ No newline at end of file +install(FILES ${INCLUDE_FILES} DESTINATION include/SplashKitBackend)