diff --git a/.gitignore b/.gitignore
index cbf9a5b3..0864e9d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,6 +77,7 @@ Makefile
 cmake_install.cmake
 splashkit_test
 projects/cmake/Resources
+llama_ext-prefix
 .ninja_deps
 .ninja_log
 build.ninja
@@ -101,6 +102,7 @@ out/lib/
 tools/scripts/nuget-pkg/obj
 tools/scripts/test/obj
 
+
 ### Debian packaging ###
 tools/scripts/debian/libsplashkit-dev*
 tools/scripts/debian/data.tar.xz
diff --git a/coresdk/external b/coresdk/external
index e089bc3c..d9c7ca08 160000
--- a/coresdk/external
+++ b/coresdk/external
@@ -1 +1 @@
-Subproject commit e089bc3ccbd7ff11027a790be44f6ab6038b5c58
+Subproject commit d9c7ca08ca9dbb0051bf57ceadb1d7a2d0f8d536
diff --git a/coresdk/src/backend/backend_types.h b/coresdk/src/backend/backend_types.h
index 03083a86..2eeead1e 100644
--- a/coresdk/src/backend/backend_types.h
+++ b/coresdk/src/backend/backend_types.h
@@ -64,6 +64,7 @@ namespace splashkit_lib
         ADC_PTR=                    0x41444350, //'ADCP';
         MOTOR_DRIVER_PTR =           0x4d444950, //'MDIP';
         SERVO_DRIVER_PTR =           0x53455256, //'SERV'; 
+        CONVERSATION_PTR =          0x434f4e56, //'CONV';
         NONE_PTR =                  0x4e4f4e45  //'NONE';
     };
 
diff --git a/coresdk/src/backend/genai_driver.cpp b/coresdk/src/backend/genai_driver.cpp
new file mode 100644
index 00000000..c6f3a05b
--- /dev/null
+++ b/coresdk/src/backend/genai_driver.cpp
@@ -0,0 +1,287 @@
+//
+//  genai_driver.cpp
+//  sk
+//
+//  Created by Sean Boettger on 19/12/2025.
+//
+#include <iostream>
+#include <string.h>
+#include <vector>
+
+#include "genai_driver.h"
+#include "core_driver.h"
+#include "utility_functions.h"
+
+namespace splashkit_lib
+{
+    namespace llamacpp
+    {
+
+        static void llama_log_callback_null(ggml_log_level level, const char * text, void * user_data){/* nothing, avoid unnecessary logging*/}
+
+        void init()
+        {
+            static bool initialized = false;
+            if (!initialized)
+            {
+                llama_log_set(llama_log_callback_null, NULL);
+
+                ggml_backend_load_all();
+
+                // Create custom logger with colouring
+                el::Configurations conf;
+                conf.setToDefault();
+                conf.setGlobally(el::ConfigurationType::Format, "%level -> %msg");
+                conf.setGlobally(el::ConfigurationType::Filename, "logs/splashkit.log");
+
+                // `el::Loggers::addFlag(el::LoggingFlag::ColoredTerminalOutput);` would be better but has global effect
+                conf.set(el::Level::Warning, el::ConfigurationType::Format, "\x1b[33m%level -> %msg\x1b[0m");
+                conf.set(el::Level::Error, el::ConfigurationType::Format, "\x1b[31m%level -> %msg\x1b[0m");
+
+                el::Loggers::reconfigureLogger("GenAI", conf);
+
+                initialized = true;
+            }
+        }
+
+        model create_model(std::string path)
+        {
+            ggml_backend_load_all();
+
+            // initialize the model
+            llama_model_params model_params = llama_model_default_params();
+            model_params.n_gpu_layers = 0; // cpu-only
+
+            llama_model * model = llama_model_load_from_file(path.c_str(), model_params);
+
+            if (model == NULL)
+            {
+                CLOG(ERROR, "GenAI") << "Unable to load language model from " << path << " - it may be corrupted or missing.";
+                return {false};
+            }
+
+            if (llama_model_has_encoder(model))
+            {
+                llama_model_free(model);
+                CLOG(ERROR, "GenAI") << "Unsupported model, requires encoder-decoder support.";
+                return {false};
+            }
+
+            const llama_vocab * vocab = llama_model_get_vocab(model);
+            const char* tmpl = llama_model_chat_template(model, /* name */ nullptr);
+
+            return {
+                true,
+                model,
+                vocab,
+                tmpl
+            };
+        }
+
+        void delete_model(model mdl)
+        {
+            if (!mdl.valid)
+                return;
+
+            if (!mdl.model)
+                return;
+
+            llama_model_free(mdl.model);
+        }
+
+        std::string format_chat(model& mdl, const std::vector<message>& messages, bool add_assistant)
+        {
+            std::vector<llama_chat_message> llama_formatted;
+            std::vector<char> formatted(0);
+
+            llama_formatted.reserve(messages.size());
+
+            for (const message& msg : messages)
+            {
+                llama_formatted.push_back({msg.role.c_str(), msg.content.c_str()});
+            }
+
+            int new_len = llama_chat_apply_template(mdl.tmpl, llama_formatted.data(), llama_formatted.size(), add_assistant, formatted.data(), formatted.size());
+            if (new_len > (int)formatted.size())
+            {
+                formatted.resize(new_len);
+                new_len = llama_chat_apply_template(mdl.tmpl, llama_formatted.data(), llama_formatted.size(), add_assistant, formatted.data(), formatted.size());
+            }
+
+            return std::string(formatted.begin(), formatted.end());
+        }
+
+        llama_tokens tokenize_string(model& mdl, const std::string& prompt, bool is_first)
+        {
+            // get token count
+            // note: returns a negative number, the count of tokens it would have returned if the buffer was large enough
+            const int n_prompt = -llama_tokenize(mdl.vocab, prompt.data(), prompt.size(), NULL, 0, is_first, true);
+
+            // create buffer
+            std::vector<llama_token> prompt_tokens(n_prompt);
+
+            // recieve the tokens
+            if (llama_tokenize(mdl.vocab, prompt.data(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), is_first, true) < 0)
+            {
+                CLOG(ERROR, "GenAI") << "Failed to tokenize the prompt.";
+                return {};
+            }
+
+            return prompt_tokens;
+        }
+
+        context start_context(model& mdl, llama_tokens& starting_context, inference_settings settings)
+        {
+            // Create the context
+            llama_context_params ctx_params = llama_context_default_params();
+            ctx_params.n_ctx = starting_context.size() + settings.max_length - 1;
+            ctx_params.n_batch = ctx_params.n_ctx;
+            ctx_params.no_perf = true;
+
+            llama_context * ctx = llama_init_from_model(mdl.model, ctx_params);
+
+            if (ctx == NULL)
+            {
+                CLOG(ERROR, "GenAI") << "Failed to create the language model context.";
+                return {nullptr};
+            }
+
+            // Create the sampler
+            auto sparams = llama_sampler_chain_default_params();
+            sparams.no_perf = true;
+            llama_sampler * smpl = llama_sampler_chain_init(sparams);
+
+            // Set up sampler
+            llama_sampler_chain_add(smpl, llama_sampler_init_min_p(settings.min_p, 1));
+            llama_sampler_chain_add(smpl, llama_sampler_init_temp(settings.temperature));
+            llama_sampler_chain_add(smpl, llama_sampler_init_top_k(settings.top_k));
+            llama_sampler_chain_add(smpl, llama_sampler_init_top_p(settings.top_p, 0));
+            if (settings.presence_penalty > 0)
+                llama_sampler_chain_add(smpl, llama_sampler_init_penalties(64, 0, 0, settings.presence_penalty));
+            llama_sampler_chain_add(smpl, llama_sampler_init_dist(settings.seed));
+
+            // Prepare batch for starting context
+            llama_tokens next_batch = starting_context;
+
+            // Cache newline token - we use this manually in some spots
+            llama_token newline_token;
+            llama_tokenize(mdl.vocab, "\n", 1, &newline_token, 1, false, true);
+
+            return
+            {
+                ctx,
+                smpl,
+                next_batch,
+                (int)ctx_params.n_ctx,
+                mdl.vocab,
+                newline_token,
+                0,
+                {},
+                false
+            };
+        }
+
+        int context_step(context& ctx, token_result* token)
+        {
+            const string THINKING_START = "<think>";
+            const string THINKING_END = "</think>";
+
+            if (!ctx.ctx)
+                return -1;
+
+            llama_batch batch = llama_batch_get_one(ctx.next_batch.data(), ctx.next_batch.size());
+            // Decode current batch with the model
+            if (llama_decode(ctx.ctx, batch))
+            {
+                CLOG(ERROR, "GenAI") << "Failed to process response from language model.";
+                if (token)
+                    token->type = token_result::NONE;
+                return -1;
+            }
+
+            ctx.total_context.insert(ctx.total_context.end(), ctx.next_batch.begin(), ctx.next_batch.end());
+            ctx.n_pos += batch.n_tokens;
+
+            // Sample next token
+            llama_token new_token_id = llama_sampler_sample(ctx.smpl, ctx.ctx, -1);
+
+            // Has the model finished its response?
+            if (llama_vocab_is_eog(ctx.vocab, new_token_id))
+            {
+                if (token)
+                    token->type = token_result::NONE;
+                return 1;
+            }
+
+            char buf[128];
+            int n = llama_token_to_piece(ctx.vocab, new_token_id, buf, sizeof(buf), 0, true);
+            if (n < 0)
+            {
+                CLOG(ERROR, "GenAI") << "Failed to convert response token from language model.";
+                return -1;
+            }
+
+            std::string s(buf, n);
+
+            if (token)
+            {
+                bool is_meta = s == THINKING_START || s == THINKING_END;
+                token->text = s;
+                if (is_meta)
+                    token->type = token_result::META;
+                else if (ctx.in_thinking)
+                    token->type = token_result::THINKING;
+                else
+                    token->type = token_result::CONTENT;
+            }
+
+            if (s == THINKING_START)
+                ctx.in_thinking = true;
+            else if (s == THINKING_END)
+                ctx.in_thinking = false;
+
+            // prepare the next batch with the sampled token
+            ctx.next_batch = {new_token_id};
+
+            // Have we reached the end of the context?
+            // If so, stop now.
+            if (ctx.n_pos + ctx.next_batch.size() >= ctx.ctx_size)
+                return 1;
+
+            return 0;
+        }
+
+        void add_to_context(context& ctx, llama_tokens& message)
+        {
+            ctx.next_batch.insert(ctx.next_batch.end(), message.begin(), message.end());
+        }
+
+        void manual_end_message(context& ctx)
+        {
+            ctx.next_batch.push_back(llama_vocab_eot(ctx.vocab));
+            ctx.next_batch.push_back(ctx.newline_token);
+        }
+
+        void delete_context(context& ctx)
+        {
+            if (ctx.smpl)
+                llama_sampler_free(ctx.smpl);
+
+            if (ctx.ctx)
+                llama_free(ctx.ctx);
+        }
+
+        void __print_debug_context(context& ctx)
+        {
+            for (auto& x : ctx.total_context)
+            {
+                char buf[128];
+                int n = llama_token_to_piece(ctx.vocab, x, buf, sizeof(buf), 0, true);
+
+                std::string s(buf, n);
+                std::cout << "|" << s;
+            }
+            std::cout << std::endl;
+        }
+    }
+}
diff --git a/coresdk/src/backend/genai_driver.h b/coresdk/src/backend/genai_driver.h
new file mode 100644
index 00000000..b24c3e91
--- /dev/null
+++ b/coresdk/src/backend/genai_driver.h
@@ -0,0 +1,110 @@
+//
+//  genai_driver.h
+//  sk
+//
+//  Created by Sean Boettger on 19/12/2025.
+//
+
+#ifndef genai_driver_h
+#define genai_driver_h
+
+#include "backend_types.h"
+
+#include "llama.h"
+
+namespace splashkit_lib
+{
+    typedef unsigned int uint;
+
+    namespace llamacpp
+    {
+        typedef std::vector<llama_token> llama_tokens;
+
+        struct model
+        {
+            bool valid;
+            llama_model* model;
+            const llama_vocab* vocab;
+            const char* tmpl;
+        };
+
+        struct inference_settings
+        {
+            double temperature = 0.6;
+            double top_p = 0.95;
+            int top_k = 20;
+            double min_p = 0;
+            double presence_penalty = 0;
+            int max_length = 256;
+            uint32_t seed = 42;
+        };
+
+        struct message
+        {
+            std::string role;
+            std::string content;
+        };
+
+        struct context
+        {
+            llama_context* ctx;
+            llama_sampler* smpl;
+            llama_tokens next_batch;
+            int ctx_size = 0;
+
+            const llama_vocab* vocab;
+            llama_token newline_token;
+
+            int n_pos;
+            llama_tokens total_context;
+
+            bool in_thinking = false;
+        };
+
+        struct token_result
+        {
+            enum token_type {
+                NONE,
+                CONTENT,
+                THINKING,
+                META
+            };
+            string text;
+            token_type type;
+        };
+
+        void init();
+
+        model create_model(std::string path);
+        void delete_model(model mdl);
+
+        std::string format_chat(model& mdl, const std::vector<message>& messages, bool add_assistant);
+        llama_tokens tokenize_string(model& mdl, const std::string& prompt, bool is_first);
+
+        context start_context(model& mdl, llama_tokens& starting_context, inference_settings settings);
+        void delete_context(context& ctx);
+
+        int context_step(context& ctx, token_result* token);
+        void add_to_context(context& ctx, llama_tokens& message);
+        void manual_end_message(context& ctx);
+
+        void __print_debug_context(context& ctx);
+    }
+
+    struct sk_conversation
+    {
+        pointer_identifier id;
+
+        llamacpp::model model;
+        llamacpp::context context;
+
+        bool was_generating;
+        bool is_generating;
+
+        string prompt_append;
+
+        llamacpp::token_result next_token;
+    };
+}
+
+#endif /* defined(graphics_driver) */
diff --git a/coresdk/src/backend/utility_functions.cpp b/coresdk/src/backend/utility_functions.cpp
index 0e4cd5cd..6985c168 100644
--- a/coresdk/src/backend/utility_functions.cpp
+++ b/coresdk/src/backend/utility_functions.cpp
@@ -73,6 +73,10 @@ namespace splashkit_lib
     string path_to_user_home()
     {
 #ifndef WINDOWS
+        string home = get_env_var("HOME");
+        if (home != "")
+            return home;
+
         struct passwd *pw = getpwuid(getuid());
         return string(pw->pw_dir);
 #else
diff --git a/coresdk/src/backend/web_driver.cpp b/coresdk/src/backend/web_driver.cpp
index f388e083..2305e91b 100644
--- a/coresdk/src/backend/web_driver.cpp
+++ b/coresdk/src/backend/web_driver.cpp
@@ -206,6 +206,67 @@ namespace splashkit_lib
         return _create_response(curl_handle, res, data_read);
     }
 
+    struct _sk_http_get_file_callback_data
+    {
+        void (*user_callback)(unsigned long, unsigned long);
+        int resuming_from;
+    };
+
+    int _sk_http_get_file_callback(_sk_http_get_file_callback_data* data, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow)
+    {
+        data->user_callback(dltotal == 0 ? 0 : (data->resuming_from + dltotal), data->resuming_from + dlnow);
+        return 0;
+    }
+
+    sk_http_response *sk_http_get_file(const string &filename, const string &host, unsigned short port, void (*user_callback)(unsigned long, unsigned long))
+    {
+        const string temp_extension = ".temp";
+        string temp_filename = filename+temp_extension;
+
+        FILE *file = fopen(temp_filename.c_str(), "ab+");
+
+        // find resume point
+        fseek(file, 0L, SEEK_END);
+        curl_off_t resume_from = ftell(file);
+
+        // init the curl session
+        CURL *curl_handle = curl_easy_init();
+        CURLcode res;
+
+        _init_curl(curl_handle, host, port);
+
+        curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);
+        curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, file);
+
+        _sk_http_get_file_callback_data callback_data;
+        if (user_callback)
+        {
+            curl_easy_setopt(curl_handle, CURLOPT_XFERINFOFUNCTION, _sk_http_get_file_callback);
+            curl_easy_setopt(curl_handle, CURLOPT_XFERINFODATA, &callback_data);
+            curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0);
+
+            callback_data.user_callback = user_callback;
+            callback_data.resuming_from = resume_from;
+        }
+
+        curl_easy_setopt(curl_handle, CURLOPT_RESUME_FROM_LARGE, resume_from);
+
+        // get it!
+        res = curl_easy_perform(curl_handle);
+
+        fclose(file);
+
+        // try renaming the temp file if the download was okay - rename returns 0 on success
+        if (res == CURLE_OK && rename(temp_filename.c_str(), filename.c_str()))
+        {
+            LOG(WARNING) << "Failed to rename temporary download file " << temp_filename << " to " << filename;
+            return nullptr;
+        }
+
+        request_stream data_read = { nullptr, 0 };
+        return _create_response(curl_handle, res, data_read);
+    }
+
     sk_http_response *sk_http_put(const string &host, unsigned short port, const string &body)
     {
         request_stream data_read = { nullptr, 0 };
diff --git a/coresdk/src/backend/web_driver.h b/coresdk/src/backend/web_driver.h
index e5e54de2..f5e4810a 100644
--- a/coresdk/src/backend/web_driver.h
+++ b/coresdk/src/backend/web_driver.h
@@ -17,6 +17,7 @@ namespace splashkit_lib
 
     sk_http_response *sk_http_post(const string &host, unsigned short port, const string &body);
     sk_http_response *sk_http_get(const string &host, unsigned short port);
+    sk_http_response *sk_http_get_file(const string &filename, const string &host, unsigned short port, void (*user_callback)(unsigned long, unsigned long));
     sk_http_response *sk_http_put(const string &host, unsigned short port, const string &body);
     sk_http_response *sk_http_delete(const string &host, unsigned short port, const string &body);
     sk_http_response *sk_http_make_request(const sk_http_request &request);
diff --git a/coresdk/src/coresdk/genai.cpp b/coresdk/src/coresdk/genai.cpp
new file mode 100644
index 00000000..cde93610
--- /dev/null
+++ b/coresdk/src/coresdk/genai.cpp
@@ -0,0 +1,499 @@
+//
+//  genai.cpp
+//  splashkit
+//
+//  Created by Sean Boettger on 20/12/25.
+//
+
+#include "genai_driver.h"
+#include "genai.h"
+#include "utility_functions.h"
+#include "web_driver.h"
+#include "terminal.h"
+#include "core_driver.h"
+
+#include <filesystem>
+
+using std::to_string;
+
+namespace splashkit_lib
+{
+    static vector<conversation> objects;
+
+    const language_model DEFAULT_LANGUAGE_MODEL = QWEN3_0_6B_INSTRUCT;
+
+    const int default_max_tokens_base = 256; // base has a higher likelihood of running forever for no reason, better to limit it early
+    const int default_max_tokens_instruct = 4096;
+    const int default_max_tokens_thinking = 4096;
+
+    extern const std::array<language_model_options, 26> models; // defined at end of file
+
+    /* terminal util functions in lieu of ncurses*/
+    void terminal_erase_left(int count /* -1 for all */)
+    {
+        if (count == 0)
+            return;
+
+        if (count == -1)
+            write("\r\033[K");
+        else
+            write("\033["+to_string(count)+"D\033[K");
+    }
+
+    std::vector<int> terminal_stack;
+
+    void terminal_push(const string &str)
+    {
+        write(str);
+        terminal_stack.push_back(str.size());
+    }
+
+    void terminal_pop()
+    {
+        terminal_erase_left(terminal_stack.back());
+        terminal_stack.pop_back();
+    }
+
+    bool download_with_progress_bar(string filename, string url)
+    {
+        auto callback = [](unsigned long expected_size, unsigned long current_size)
+        {
+            terminal_pop();
+
+            if (expected_size == 0)
+            {
+                terminal_push("");
+                return;
+            }
+
+            static int spinner_index = 0;
+
+            const int progress_bar_length = 10;
+            const string spinner = "|/-\\";
+
+            int expected_mb = expected_size / (1024 * 1024);
+            int current_mb = current_size / (1024 * 1024);
+
+            // construct progress bar
+            int progress_bar_filled = 0;
+            if (expected_size > 0)
+                progress_bar_filled = (int)(progress_bar_length * current_size/(double)expected_size);
+            if (progress_bar_filled > progress_bar_length)
+                progress_bar_filled = progress_bar_length;
+
+            string progress_bar = string(progress_bar_filled, '=') + string(progress_bar_length-progress_bar_filled, ' ');
+            if (progress_bar_filled <= progress_bar_length)
+                progress_bar[progress_bar_filled] = spinner[(spinner_index++)/2 % spinner.size()];
+
+            // write message
+            terminal_push(progress_bar + "| (" + to_string(current_mb) + "mb / " + to_string(expected_mb) + "mb)");
+        };
+
+        terminal_push("");
+
+        sk_http_response * resp = sk_http_get_file(filename, url, 443, callback);
+
+        terminal_pop();
+
+        return resp != nullptr && resp->code >= 200 && resp->code < 300;
+    }
+
+    bool ensure_exists_or_download(string path, string url, string message)
+    {
+        if (std::filesystem::exists(path))
+            return true;
+
+        terminal_push(message);
+
+        bool result = download_with_progress_bar(path, url);
+
+        terminal_pop();
+
+        return result;
+    }
+
+    llamacpp::model __get_model(language_model_options options)
+    {
+        llamacpp::init();
+
+        if (options.url != "" && !ensure_exists_or_download(options.path, options.url, " ::: Downloading Language Model: " + options.name + " |"))
+        {
+            CLOG(ERROR, "GenAI") << "Failed to download language model - see error above.";
+            return {false};
+        }
+
+        return llamacpp::create_model(options.path);
+    }
+
+    string __generate_common(string prompt, language_model_options options, bool format_chat)
+    {
+        llamacpp::model model = __get_model(options);
+
+        if (!model.valid) return "";
+
+        std::string formatted = prompt;
+
+        if (format_chat)
+        {
+            formatted = llamacpp::format_chat(model, {
+                {
+                    "user", prompt + options.prompt_append
+                },
+            }, true);
+        }
+        llamacpp::llama_tokens tokens = llamacpp::tokenize_string(model, formatted, true);
+
+        llamacpp::context ctx = llamacpp::start_context(model, tokens, {
+            options.temperature,
+            options.top_p,
+            options.top_k,
+            options.min_p,
+            options.presence_penalty,
+            options.max_tokens,
+            (uint32_t)options.seed
+        });
+
+        std::string result = "";
+        llamacpp::token_result token;
+
+        while (!llamacpp::context_step(ctx, &token))
+        {
+            if (token.type == llamacpp::token_result::CONTENT)
+                result += token.text;
+        };
+
+        llamacpp::delete_context(ctx);
+        llamacpp::delete_model(model);
+
+        return result;
+    }
+
+
+    string generate_reply(string prompt)
+    {
+        return generate_reply(DEFAULT_LANGUAGE_MODEL, prompt);
+    }
+
+    string generate_reply(language_model model, string prompt)
+    {
+        return generate_reply(prompt, option_language_model(model));
+    }
+
+    string generate_reply(string prompt, language_model_options options)
+    {
+        return __generate_common(prompt, options, true);
+    }
+
+    string generate_text(string text)
+    {
+        return generate_text(DEFAULT_LANGUAGE_MODEL, text);
+    }
+
+    string generate_text(language_model model, string text)
+    {
+        return generate_text(text, option_language_model(model));
+    }
+
+    string generate_text(string text, language_model_options options)
+    {
+        return __generate_common(text, options, false);
+    }
+
+    // --------------------------------------------------------------
+
+    // Streaming conversation
+
+    #define CONVERSATION_CHECK(x, val) \
+        if (INVALID_PTR(c, CONVERSATION_PTR))\
+        {\
+            LOG(WARNING) << "Passed an invalid conversation object to " x;\
+            return val;\
+        }
+
+    conversation create_conversation()
+    {
+        return create_conversation(option_language_model(DEFAULT_LANGUAGE_MODEL));
+    }
+
+    conversation create_conversation(language_model model)
+    {
+        return create_conversation(option_language_model(model));
+    }
+
+    conversation create_conversation(language_model_options options)
+    {
+        internal_sk_init();
+
+        llamacpp::model model = __get_model(options);
+
+        if (!model.valid) return nullptr;
+
+        llamacpp::llama_tokens initial_tokens = llamacpp::tokenize_string(model, "", true);
+
+        sk_conversation* c = new sk_conversation();
+        c->id = CONVERSATION_PTR;
+        c->model = model;
+        c->context = llamacpp::start_context(model, initial_tokens, {
+            options.temperature,
+            options.top_p,
+            options.top_k,
+            options.min_p,
+            options.presence_penalty,
+            options.max_tokens,
+            (uint32_t)options.seed
+        });;
+
+        c->was_generating = false;
+        c->is_generating = true;
+
+        c->prompt_append = options.prompt_append;
+
+        objects.push_back(c);
+
+        return c;
+    };
+
+    void conversation_add_message(conversation c, const string& message)
+    {
+        CONVERSATION_CHECK("conversation_add_message", )
+
+        // end the language model's turn
+        if (c->was_generating)
+        {
+            c->was_generating = false;
+            llamacpp::manual_end_message(c->context);
+        }
+
+        // tokenize user's prompt and add to context
+        llamacpp::llama_tokens tokens = llamacpp::tokenize_string(c->model, llamacpp::format_chat(c->model, {
+            {"user", message + c->prompt_append}
+        }, true), false);
+        llamacpp::add_to_context(c->context, tokens);
+
+        // the model is ready to generate again
+        c->is_generating = true;
+    }
+
+    void __buffer_next_token(conversation c)
+    {
+        if (c->next_token.type != llamacpp::token_result::token_type::NONE)
+            return; // already buffered
+
+        // attempt to get next token that is non-meta
+        do
+        {
+            // if we reach the end of the message, return even if a meta token (shouldn't happen though)
+            if (llamacpp::context_step(c->context, &c->next_token))
+            {
+                c->is_generating = false;
+                return;
+            }
+        } while (c->next_token.type == llamacpp::token_result::token_type::META);
+    }
+
+    // These next three functions buffer the next token so that they can
+    // return information about it
+    bool conversation_is_replying(conversation c)
+    {
+        CONVERSATION_CHECK("conversation_is_replying", false)
+
+        __buffer_next_token(c);
+
+        return c->is_generating;
+    }
+
+    bool conversation_is_thinking(conversation c)
+    {
+        CONVERSATION_CHECK("conversation_is_thinking", false)
+
+        __buffer_next_token(c);
+
+        return c->next_token.type == llamacpp::token_result::token_type::THINKING;
+    }
+
+    string conversation_get_reply_piece(conversation c)
+    {
+        CONVERSATION_CHECK("conversation_get_reply_piece", "")
+
+        // if the user wants a token, we can resume generating even if we already finished
+        c->is_generating = true;
+        c->was_generating = true;
+
+        __buffer_next_token(c);
+
+        // token is consumed
+        c->next_token.type = llamacpp::token_result::token_type::NONE;
+
+        return c->next_token.text;
+    }
+
+    void __free_conversation_resource(conversation c)
+    {
+        llamacpp::delete_context(c->context);
+        llamacpp::delete_model(c->model);
+    }
+
+    void free_conversation(conversation c)
+    {
+        CONVERSATION_CHECK("free_conversation", )
+
+        __free_conversation_resource(c);
+
+        for (auto it = objects.begin(); it != objects.end(); it++)
+        {
+            if (*it == c)
+            {
+                notify_of_free(c);
+
+                delete *it;
+
+                it = objects.erase(it);
+                return;
+            }
+        }
+    }
+
+    void free_all_conversations()
+    {
+        for (conversation c : objects)
+        {
+            __free_conversation_resource(c);
+        }
+
+        objects.clear();
+    }
+
+    // --------------------------------------------------------------
+
+    language_model_options option_language_model(language_model model)
+    {
+        if (model < 0 || model >= models.size() || models[model].name == "")
+        {
+            model = DEFAULT_LANGUAGE_MODEL;
+            CLOG(WARNING, "GenAI") << "Invalid model selected, defaulting to '" << models[model].name << "'";
+        }
+
+        string home_path = path_from( {path_to_user_home(), ".splashkit", "models"} );
+
+        language_model_options options = models[model];
+        options.path =  home_path + options.path;
+        options.seed = 0;
+
+        return options;
+    }
+
+    // --------------------------------------------------------------
+
+
+    // default model definitions
+
+    const std::array<language_model_options, 26> models = {{
+        [0]={}, [1]={}, [2]={}, [3]={},
+
+        [QWEN3_0_6B_BASE] = {
+            "Qwen3 0.6B Base",
+            "https://huggingface.co/mradermacher/Qwen3-0.6B-Base-GGUF/resolve/main/Qwen3-0.6B-Base.Q8_0.gguf?download=true",
+            "Qwen3-0.6B-Base.Q8_0.gguf",
+            default_max_tokens_base, 0.7, 0.8, 20, 0, 1.5
+        },
+        [QWEN3_0_6B_INSTRUCT] = {
+            "Qwen3 0.6B Instruct",
+            "https://huggingface.co/Qwen/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q8_0.gguf?download=true",
+            "Qwen3-0.6B-Q8_0.gguf",
+            default_max_tokens_instruct, 0.7, 0.8, 20, 0, 1.5, " /no_think"
+        },
+        [QWEN3_0_6B_THINKING] = {
+            "Qwen3 0.6B Thinking",
+            "https://huggingface.co/Qwen/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q8_0.gguf?download=true",
+            "Qwen3-0.6B-Q8_0.gguf",
+            default_max_tokens_thinking, 0.6, 0.95, 20, 0, 1.5
+        },
+
+        [7]={},
+
+        [QWEN3_1_7B_BASE] = {
+            "Qwen3 1.7B Base",
+            "https://huggingface.co/mradermacher/Qwen3-1.7B-Base-GGUF/resolve/main/Qwen3-1.7B-Base.Q8_0.gguf?download=true",
+            "Qwen3-1.7B-Base.Q8_0.gguf",
+            default_max_tokens_base, 0.7, 0.8, 20, 0, 1.5
+        },
+        [QWEN3_1_7B_INSTRUCT] = {
+            "Qwen3 1.7B Instruct",
+            "https://huggingface.co/Qwen/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q8_0.gguf?download=true",
+            "Qwen3-1.7B-Q8_0.gguf",
+            default_max_tokens_instruct, 0.7, 0.8, 20, 0, 1.5, " /no_think"
+        },
+        [QWEN3_1_7B_THINKING] = {
+            "Qwen3 1.7B Thinking",
+            "https://huggingface.co/Qwen/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q8_0.gguf?download=true",
+            "Qwen3-1.7B-Q8_0.gguf",
+            default_max_tokens_thinking, 0.6, 0.95, 20, 0, 1.5
+        },
+
+        [11]={},
+
+        [QWEN3_4B_BASE] = {
+            "Qwen3 4B Base",
+            "https://huggingface.co/mradermacher/Qwen3-4B-Base-GGUF/resolve/main/Qwen3-4B-Base.Q2_K.gguf?download=true",
+            "Qwen3-4B-Base.Q2_K.gguf",
+            default_max_tokens_base, 0.7, 0.8, 20, 0, 0
+        },
+        [QWEN3_4B_INSTRUCT] = {
+            "Qwen3 4B Instruct",
+            "https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/resolve/main/Qwen3-4B-Instruct-2507-UD-Q2_K_XL.gguf?download=true",
+            "Qwen3-4B-Instruct-2507-UD-Q2_K_XL.gguf",
+            default_max_tokens_instruct, 0.7, 0.8, 20, 0, 0
+        },
+        [QWEN3_4B_THINKING] = {
+            "Qwen3 4B Thinking",
+            "https://huggingface.co/unsloth/Qwen3-4B-Thinking-2507-GGUF/resolve/main/Qwen3-4B-Thinking-2507-UD-Q2_K_XL.gguf?download=true",
+            "Qwen3-4B-Thinking-2507-UD-Q2_K_XL.gguf",
+            default_max_tokens_thinking, 0.6, 0.95, 20, 0, 0
+        },
+
+        [15]={},
+
+        [GEMMA3_270M_BASE] = {
+            "Gemma3 270M Base",
+            "https://huggingface.co/ggml-org/gemma-3-270m-GGUF/resolve/main/gemma-3-270m-Q8_0.gguf?download=true",
+            "gemma-3-270m-Q8_0.gguf",
+            default_max_tokens_base, 1.0, 0.95, 64, 0, 0
+        },
+        [GEMMA3_270M_INSTRUCT] = {
+            "Gemma3 270M Instruct",
+            "https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-Q8_0.gguf?download=true",
+            "gemma-3-270m-it-Q8_0.gguf",
+            default_max_tokens_instruct, 1.0, 0.95, 64, 0, 0
+        },
+
+        [18]={}, [19]={},
+
+        [GEMMA3_1B_BASE] = {
+            "Gemma3 1B Base",
+            "https://huggingface.co/mradermacher/gemma-3-1b-pt-GGUF/resolve/main/gemma-3-1b-pt.Q8_0.gguf?download=true",
+            "gemma-3-1b-pt.Q8_0.gguf",
+            default_max_tokens_base, 1.0, 0.95, 64, 0, 0
+        },
+        [GEMMA3_1B_INSTRUCT] = {
+            "Gemma3 1B Instruct",
+            "https://huggingface.co/unsloth/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it-Q8_0.gguf?download=true",
+            "gemma-3-1b-it-Q8_0.gguf",
+            default_max_tokens_instruct, 1.0, 0.95, 64, 0, 0
+        },
+
+        [22]={}, [23]={},
+
+        [GEMMA3_4B_BASE] = {
+            "Gemma3 4B Base",
+            "https://huggingface.co/mradermacher/gemma-3-4b-pt-GGUF/resolve/main/gemma-3-4b-pt.Q2_K.gguf?download=true",
+            "gemma-3-4b-pt.Q2_K.gguf",
+            default_max_tokens_base, 1.0, 0.95, 64, 0, 0
+        },
+        [GEMMA3_4B_INSTRUCT] = {
+            "Gemma3 4B Instruct",
+            "https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-UD-IQ3_XXS.gguf?download=true",
+            "gemma-3-4b-it-UD-IQ3_XXS.gguf",
+            default_max_tokens_instruct, 1.0, 0.95, 64, 0, 0
+        }
+    }};
+}
diff --git a/coresdk/src/coresdk/genai.h b/coresdk/src/coresdk/genai.h
new file mode 100644
index 00000000..c57f1fa8
--- /dev/null
+++ b/coresdk/src/coresdk/genai.h
@@ -0,0 +1,258 @@
+/**
+ * @header  genai
+ * @author  Sean Boettger
+ * @brief   SplashKit gives you a simple way to use and embed local AIs in your projects,
+ * that run on your own computer.
+ *
+ * @attribute group  generative_ai
+ * @attribute static generative_ai
+ */
+
+#ifndef genai_hpp
+#define genai_hpp
+
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+using std::string;
+
+namespace splashkit_lib
+{
+    /**
+     * The `conversation` type is used to refer to conversations between the user
+     * and a language model. You can use it to send messages to the language model,
+     * and stream responses back.
+     *
+     *
+     * All `conversation` objects are:
+     *
+     *
+     *   - created with `create_conversation()`, `create_conversation(language_model model)` or
+     *   `create_conversation(language_model_options options)`
+     *
+     *
+     *   - and must be released using `free_conversation()` (to release a specific `conversation` object)
+     *   or `free_all_conversation()` (to release all created `conversation` objects).
+     *
+     *
+     * @attribute class conversation
+     */
+    typedef struct sk_conversation *conversation;
+
+    /**
+     * @brief Generates a reply to a textual prompt by a language model
+     *
+     * The language model will respond to the textual prompt in a chat style format. It will follow instructions and answer questions.
+     * Instruct or Thinking models are recommended. Base models likely won't output sensible results.
+     *
+     * @param prompt The prompt for the language model to reply to.
+     *
+     * @returns The generated reply.
+     */
+    string generate_reply(string prompt);
+
+    /**
+     * @brief Generates a reply to a textual prompt by a language model
+     *
+     * The language model will respond to the textual prompt in a chat style format. It will follow instructions and answer questions.
+     * Instruct or Thinking models are recommended. Base models likely won't output sensible results.
+     *
+     * @param model  The language model to use
+     * @param prompt The prompt for the language model to reply to.
+     *
+     * @returns The generated reply.
+     *
+     * @attribute suffix with_model
+     */
+    string generate_reply(language_model model, string prompt);
+
+    /**
+     * @brief Generates a reply to a textual prompt by a language model
+     *
+     * The language model will respond to the textual prompt in a chat style format. It will follow instructions and answer questions.
+     * Instruct or Thinking models are recommended. Base models likely won't output sensible results.
+     *
+     * @param prompt  The prompt for the language model to reply to.
+     * @param options The generation options - use the `option_` functions to create this, for instance `option_language_model`
+     *
+     * @returns The generated reply.
+     *
+     * @attribute suffix with_options
+     */
+    string generate_reply(string prompt, language_model_options options);
+
+
+    /**
+     * @brief Generates text that continues from a prompt
+     *
+     * The language model will continue predicting text based on patterns in the prompt - it will not directly follow instructions or answer questions.
+     * Base models are recommended; Instruct and Thinking models may work.
+     *
+     * @param text The input text for the language model to continue.
+     *
+     * @returns The generated reply.
+     */
+    string generate_text(string text);
+
+    /**
+     * @brief Generates text that continues from a prompt
+     *
+     * The language model will continue predicting text based on patterns in the prompt - it will not directly follow instructions or answer questions.
+     * Base models are recommended; Instruct and Thinking models may work.
+     *
+     * @param model  The language model to use
+     * @param text The input text for the language model to continue.
+     *
+     * @returns The generated reply.
+     *
+     * @attribute suffix with_model
+     */
+    string generate_text(language_model model, string text);
+
+    /**
+     * @brief Generates text that continues from a prompt
+     *
+     * The language model will continue predicting text based on patterns in the prompt - it will not directly follow instructions or answer questions.
+     * Base models are recommended; Instruct and Thinking models may work.
+     *
+     * @param text The input text for the language model to continue.
+     * @param options The generation options - use the `option_` functions to create this, for instance `option_language_model`
+     *
+     * @returns The generated reply.
+     *
+     * @attribute suffix with_options
+     */
+    string generate_text(string text, language_model_options options);
+
+    /**
+     * @brief Creates a new `conversation` object, that uses the default language model.
+     *
+     * The `conversation` object can have messages added to it, and responses streamed back from it via the other Conversation functions and procedures
+     *
+     * @returns Returns a new `conversation` object.
+     *
+     * @attribute class       conversation
+     * @attribute constructor true
+     */
+    conversation create_conversation();
+
+    /**
+     * @brief Creates a new `conversation` object, that uses a chosen language model.
+     *
+     * The `conversation` object can have messages added to it, and responses streamed back from it via the other Conversation functions and procedures
+     *
+     * @param model The language model to use
+     *
+     * @returns Returns a new `conversation` object.
+     *
+     * @attribute class       conversation
+     * @attribute constructor true
+     *
+     * @attribute suffix with_model
+     */
+    conversation create_conversation(language_model model);
+
+    /**
+     * @brief Creates a new `conversation` object, that uses a chosen language model among other options.
+     *
+     * The `conversation` object can have messages added to it, and responses streamed back from it via the other Conversation functions and procedures
+     *
+     * @param options The options to use - use this to choose the language model, and change various parameters.
+     *
+     * @returns Returns a new `conversation` object.
+     *
+     * @attribute class       conversation
+     * @attribute constructor true
+     *
+     * @attribute suffix with_options
+     */
+    conversation create_conversation(language_model_options options);
+
+    /**
+     * Checks if a language model is currently generating a reply within a `conversation`.
+     * If so, you can continue to receive the message with `conversation_get_reply_piece(conversation c)`
+     *
+     * @param c The `conversation` object to check
+     *
+     * @returns Returns whether the language model is still generating a reply
+     *
+     * @attribute class conversation
+     * @attribute method is_replying
+     * @attribute self c
+     */
+    bool conversation_is_replying(conversation c);
+
+    /**
+     * Checks if a language model is currently "thinking" while generating a reply within a `conversation`.
+     * You can use this to filter out the "thoughts" and display them differently (or hide them entirely)
+     *
+     * @param c The `conversation` object to check
+     *
+     * @returns Returns whether the language model is currently thinking while generating a reply
+     *
+     * @attribute class conversation
+     * @attribute method is_thinking
+     * @attribute self c
+     */
+    bool conversation_is_thinking(conversation c);
+
+    /**
+     * Adds a message to a `conversation`, that the language model will begin replying to.
+     * You can receive the reply one piece at a time by calling `conversation_get_reply_piece(conversation c)` in a loop
+     *
+     * @param c The `conversation` object to check
+     * @param message The user message to add to the conversation - the language model will reply to this
+     *
+     * @attribute class conversation
+     * @attribute method add_message
+     * @attribute self c
+     */
+    void conversation_add_message(conversation c, const string& message);
+
+    /**
+     * Returns a single piece of a reply (generally one word at a time) from the `conversation`
+     * You can use a loop while checking `conversation_is_replying` to retrieve the reply as it generates
+     *
+     * @param c The `conversation` object to recieve the reply from
+     *
+     * @returns Returns a small piece of the reply (generally 1 word or less)
+     *
+     * @attribute class conversation
+     * @attribute method get_reply_piece
+     * @attribute self c
+     */
+    string conversation_get_reply_piece(conversation c);
+
+    /**
+     * Frees the resources associated with the `conversation` object.
+     *
+     * @param c The `conversation` object whose resources should be released.
+     *
+     * @attribute class      conversation
+     * @attribute destructor true
+     * @attribute self       c
+     * @attribute method     free
+     */
+    void free_conversation(conversation c);
+
+    /**
+     * Releases all of the `conversation` objects which have been loaded.
+     *
+     * @attribute static conversations
+     * @attribute method free_all
+     */
+    void free_all_conversations();
+
+    /**
+     * Use this option to choose which language model to use, and initialize its default settings
+     *
+     * @param  model The language model to use
+     *
+     * @return       Language model options that will use that model and its default settings.
+     */
+    language_model_options option_language_model(language_model model);
+
+}
+#endif /* genai_hpp */
diff --git a/coresdk/src/coresdk/types.h b/coresdk/src/coresdk/types.h
index 3573af9f..c46ce9f6 100644
--- a/coresdk/src/coresdk/types.h
+++ b/coresdk/src/coresdk/types.h
@@ -548,5 +548,75 @@ namespace splashkit_lib
         BUBBLE = 4,
         BUBBLE_MULTICOLORED = 5
     };
+
+    /**
+     * Language Models:
+     * Choose between different language models to trade off speed and intelligence
+     * Each model is scaled to fit within 1~2GB and will be automatically downloaded when needed - feel free to try them out!
+     *
+     * @constant QWEN3_0_6B_BASE       Qwen3 0.6B Base model - small, extremely fast and good for text commpletion. Very limited world knowledge.
+     * @constant QWEN3_0_6B_INSTRUCT   Qwen3 0.6B Instruct model (default) - small, extremely fast and can follow simple instructions. Very limited world knowledge.
+     * @constant QWEN3_0_6B_THINKING   Qwen3 0.6B Thinking model - small, extremely fast and can follow more specific instructions, but has a short delay before starting to reply. Very limited world knowledge.
+     * @constant QWEN3_1_7B_BASE       Qwen3 1.7B Base model - decently fast and good for text commpletion. Limited world knowledge.
+     * @constant QWEN3_1_7B_INSTRUCT   Qwen3 1.7B Instruct model - decently fast and can follow instructions. Limited world knowledge.
+     * @constant QWEN3_1_7B_THINKING   Qwen3 1.7B Thinking model - decently fast and can follow more difficult instructions, but has a delay before starting to reply. Limited world knowledge.
+     * @constant QWEN3_4B_BASE         Qwen3 4B Base model - slower but excellent for text commpletion/pattern based completion
+     * @constant QWEN3_4B_INSTRUCT     Qwen3 4B Instruct model - slower but can follow complex instructions
+     * @constant QWEN3_4B_THINKING     Qwen3 4B Thinking model - slower but can follow complex and specific instructions, but has a potentially long delay before starting to reply
+     * @constant GEMMA3_270M_BASE      Gemma3 270M Base model - tiny, extremely fast, and good for text completion. Very limited world knowledge.
+     * @constant GEMMA3_270M_INSTRUCT  Gemma3 270M Instruct model - tiny, extremely fast, and good for very simple instructions. Very limited world knowledge.
+     * @constant GEMMA3_1B_BASE        Gemma3 1B Base model - fast and good for text completion. Has decent world knowledge and multi-lingual abilities.
+     * @constant GEMMA3_1B_INSTRUCT    Gemma3 1B Instruct model - fast and can follow instructions. Has decent world knowledge and multi-lingual abilities.
+     * @constant GEMMA3_4B_BASE        Gemma3 4B Base model - slower but good for text commpletion/pattern based completion. Has decent world knowledge and multi-lingual abilities.
+     * @constant GEMMA3_4B_INSTRUCT    Gemma3 4B Instruct model - slower but can follow complex instructions. Has decent world knowledge and multi-lingual abilities.
+     */
+    enum language_model
+    {
+        QWEN3_0_6B_BASE = 4,
+        QWEN3_0_6B_INSTRUCT = 5,
+        QWEN3_0_6B_THINKING = 6,
+        QWEN3_1_7B_BASE = 8,
+        QWEN3_1_7B_INSTRUCT = 9,
+        QWEN3_1_7B_THINKING = 10,
+        QWEN3_4B_BASE = 12,
+        QWEN3_4B_INSTRUCT = 13,
+        QWEN3_4B_THINKING = 14,
+        GEMMA3_270M_BASE = 16,
+        GEMMA3_270M_INSTRUCT = 17,
+        GEMMA3_1B_BASE = 20,
+        GEMMA3_1B_INSTRUCT = 21,
+        GEMMA3_4B_BASE = 24,
+        GEMMA3_4B_INSTRUCT = 25,
+    };
+
+    /**
+     * Language model options allow you to customize the language model used. These should be
+     * initialised using functions such as `option_language_model`.
+     *
+     * @field name             The name of the model (used in diagnostic messages).
+     * @field url              A URL to download a model from.
+     * @field path             A path to a custom language model (.gguf) file on your computer/a place to download it to.
+     * @field max_tokens       The maximum number of tokens to output when replying. One word is approximately two tokens.
+     * @field temperature      Increases the likelihood of unlikely tokens to be chosen.
+     * @field top_p            Only choose from the top P most likely tokens.
+     * @field top_k            Only choose from the top K most likely tokens.
+     * @field min_p            Remove tokens less likely than P.
+     * @field presence_penalty Penalizes words that have been used once, making them less likely. Can reduce repetition.
+     * @field prompt_append    A string to append to prompts automatically.
+     */
+    struct language_model_options
+    {
+        string name;
+        string url;
+        string path;
+        int max_tokens;
+        double temperature;
+        double top_p;
+        int top_k;
+        double min_p;
+        double presence_penalty;
+        string prompt_append;
+        int seed;
+    };
 }
 #endif /* types_hpp */
diff --git a/coresdk/src/test/test_genai.cpp b/coresdk/src/test/test_genai.cpp
new file mode 100644
index 00000000..669bea23
--- /dev/null
+++ b/coresdk/src/test/test_genai.cpp
@@ -0,0 +1,73 @@
+//
+//  test_genai.cpp
+//  splashkit
+//
+//  Created by Sean Boettger on 20/12/2025.
+//
+
+#include "genai.h"
+#include "terminal.h"
+#include "basics.h"
+#include "utils.h"
+#include <vector>
+#include <iostream>
+
+using namespace std;
+using namespace splashkit_lib;
+
+void run_genai_test()
+{
+    const string THINKING_STYLE = "\033[37;3m";
+    const string RESET_STYLE = "\033[0m";
+
+    conversation conv = create_conversation(QWEN3_1_7B_THINKING);
+
+    while(true)
+    {
+        write("\n> ");
+        string prompt = read_line();
+
+        // See if the user wants to exit
+        string exit = trim(generate_reply(QWEN3_1_7B_INSTRUCT, "User A: "+prompt+"\nDoes user A want to end the conversation? Answer with one word, either CONTINUE or END:"));
+
+        write_line("["+exit+"]");
+
+        if (exit == "END")
+            break;
+
+        // otherwise continue the conversation
+        conversation_add_message(conv, prompt);
+
+        bool thinking = false;
+        string last_piece = "\n";
+        while(conversation_is_replying(conv))
+        {
+            if (conversation_is_thinking(conv) != thinking)
+            {
+                thinking = conversation_is_thinking(conv);
+
+                if (thinking)
+                    write(THINKING_STYLE);
+                else
+                    write(RESET_STYLE);
+            }
+
+            string piece = conversation_get_reply_piece(conv);
+
+            // avoid double newlines - ideally this will be filtered on SplashKit's side instead
+            if (piece == "\n" && last_piece == "\n")
+                continue;
+
+            if (piece == "\n\n")
+                piece = "\n";
+
+            write(piece);
+            last_piece = piece;
+        }
+
+        if (last_piece != "\n")
+            write("\n");
+    }
+
+    free_conversation(conv);
+}
diff --git a/coresdk/src/test/test_main.cpp b/coresdk/src/test/test_main.cpp
index 0b6e8ab9..e4b7750a 100644
--- a/coresdk/src/test/test_main.cpp
+++ b/coresdk/src/test/test_main.cpp
@@ -68,6 +68,7 @@ void setup_tests()
     add_test("GPIO - SPI MAX7219 LED matrix Tests", run_gpio_spi_led_matrix_tests);
     add_test("GPIO - I2C HT16K33 LED matrix Tests", run_gpio_i2c_led_matrix_tests);
     add_test("GPIO - I2C HT16K33 LED 14 Segment Tests", run_gpio_i2c_quad_14_seg_test);
+    add_test("Gen AI", run_genai_test);
 }
 
 int main(int argv, char **args)
diff --git a/coresdk/src/test/test_main.h b/coresdk/src/test/test_main.h
index 1beddfc8..89f42267 100644
--- a/coresdk/src/test/test_main.h
+++ b/coresdk/src/test/test_main.h
@@ -44,5 +44,6 @@ void run_gpio_i2c_quad_14_seg_test();
 void run_terminal_test();
 void run_logging_test();
 void run_ui_test();
+void run_genai_test();
 
 #endif /* test_main_h */
diff --git a/projects/cmake/CMakeLists.txt b/projects/cmake/CMakeLists.txt
index 0780489a..017e2a27 100644
--- a/projects/cmake/CMakeLists.txt
+++ b/projects/cmake/CMakeLists.txt
@@ -5,6 +5,7 @@ set(CMAKE_BUILD_TYPE Debug)
 
 cmake_policy(SET CMP0083 NEW)
 include(CheckPIESupported)
+include(ExternalProject)
 check_pie_supported()
 
 # SK Directories relative to cmake project
@@ -44,6 +45,7 @@ if (APPLE)
                    -framework AudioToolbox \
                    -framework CoreAudio \
                    -framework CoreVideo \
+                   -framework Accelerate \
                    -lSDL2 \
                    -lSDL2_mixer \
                    -lSDL2_ttf \
@@ -245,6 +247,8 @@ include_directories("${SK_EXT}/hash-library")
 include_directories("${SK_EXT}/json")
 include_directories("${SK_EXT}/catch")
 include_directories("${SK_EXT}/microui/src")
+include_directories("${SK_EXT}/llama.cpp/include")
+include_directories("${SK_EXT}/llama.cpp/ggml/include")
 
 # MAC OS DIRECTORY INCLUDES
 if (APPLE)
@@ -257,13 +261,60 @@ if (APPLE)
     include_directories("${SK_EXT}/SDL_image/external/libpng-1.6.2")
 endif()
 
+# INCLUDE LLAMA.CPP
+
+# Included as an external project so that it can be configured
+# as Release, independently of the main project.
+
+# Compiled as CPU only
+# TODO: Decide on minimum architecture requirements
+ExternalProject_Add(
+    llama_ext
+    SOURCE_DIR "${SK_EXT}/llama.cpp"
+    CMAKE_ARGS
+        -DLLAMA_BUILD_TESTS=OFF
+        -DLLAMA_BUILD_TOOLS=OFF
+        -DLLAMA_BUILD_EXAMPLES=OFF
+        -DLLAMA_BUILD_SERVER=OFF
+        -DGGML_BLAS=OFF
+        -DGGML_METAL=OFF
+        -DGGML_VULKAN=OFF
+        -DBUILD_SHARED_LIBS=OFF
+        -DLLAMA_BUILD_COMMON=OFF
+        -DLLAMA_TOOLS_INSTALL=OFF
+        -DCMAKE_BUILD_TYPE=Release
+        -DGGML_STATIC=ON
+        -DGGML_OPENMP=OFF
+        -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
+)
+
+ExternalProject_Get_Property(llama_ext INSTALL_DIR)
+
+foreach(lib llama ggml ggml-cpu ggml-base)
+    add_library(${lib} STATIC IMPORTED GLOBAL)
+    if (MSYS AND NOT "${lib}" STREQUAL "llama") # llama still ends up as libllama.a on Windows, unsure why
+        set_target_properties(${lib} PROPERTIES
+            IMPORTED_LOCATION
+                ${INSTALL_DIR}/lib/${lib}.a # no lib prefix
+        )
+    else()
+        set_target_properties(${lib} PROPERTIES
+            IMPORTED_LOCATION
+                ${INSTALL_DIR}/lib/lib${lib}.a # lib prefix
+        )
+    endif()
+    add_dependencies(${lib} llama_ext)
+endforeach()
+
+set(LLAMA_LIB_FLAGS llama ggml ggml-cpu ggml-base)
+
 # MACRO DEFINITIONS #
 add_definitions(-DELPP_THREAD_SAFE)
 
 #### END SETUP ####
 #### SplashKitBackend STATIC LIBRARY ####
 add_library(SplashKitBackend STATIC ${SOURCE_FILES} ${INCLUDE_FILES})
-target_link_libraries(SplashKitBackend ${LIB_FLAGS})
+target_link_libraries(SplashKitBackend ${LIB_FLAGS} ${LLAMA_LIB_FLAGS})
 
 if(RASPBERRY_PI)
     if(RASPBERRY_PI_5)
@@ -373,4 +424,4 @@ catch_discover_tests(skunit_tests)
 #### END skunit_tests EXECUTABLE ####
 
 install(TARGETS SplashKitBackend DESTINATION lib)
-install(FILES ${INCLUDE_FILES} DESTINATION include/SplashKitBackend)
\ No newline at end of file
+install(FILES ${INCLUDE_FILES} DESTINATION include/SplashKitBackend)