From 75f10ec8c030c53340f130192f48fdfc73da3d3b Mon Sep 17 00:00:00 2001 From: Kristian Polso Date: Sun, 5 Nov 2023 13:26:06 +0200 Subject: [PATCH] ElasticSearch backend --- .idea/php-rag.iml | 7 + .idea/php.xml | 6 + README.md | 15 +- composer.json | 10 +- composer.lock | 343 ++++++++++++++++++++++++++++++++- env.example | 4 + examples/chicago-prompt.php | 15 +- src/Backend/ElasticBackend.php | 96 +++++++++ 8 files changed, 480 insertions(+), 16 deletions(-) create mode 100644 src/Backend/ElasticBackend.php diff --git a/.idea/php-rag.iml b/.idea/php-rag.iml index 95f0ed0..a4d38a4 100644 --- a/.idea/php-rag.iml +++ b/.idea/php-rag.iml @@ -28,6 +28,13 @@ + + + + + + + diff --git a/.idea/php.xml b/.idea/php.xml index be39697..a242c2c 100644 --- a/.idea/php.xml +++ b/.idea/php.xml @@ -26,6 +26,12 @@ + + + + + + diff --git a/README.md b/README.md index 33999cb..7e49fec 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,10 @@ If you are not using DDEV, you will need: - SOLR_HOST - Hostname of Solr server - SOLR_PORT - Port of Solr server -- SOLR_CORE - Name of the core of the Solr server +- SOLR_CORE - Name of the core of the Solr server +- ELASTIC_HOST - Hostname and port of Elastic server +- ELASTIC_API_KEY - API key of Elastic server +- ELASTIC_INDEX - Name of the index of the Elastic server - CONTEXT_TOKEN_COUNT - Maximum token count to be included in LLM context - REPLICATE_API_KEY - Replicate API key (if using Replicate LLM) - REPLICATE_MODEL_VERSION - Hash of the model version used in Replicate (if using Replicate LLM) @@ -35,11 +38,17 @@ If you are not using DDEV, you will need: ### Backend (database) -Currently only supports Solr server. But more backends (like SQL or Elasticsearch) can be done easily, just implement the `Krisseck\PhpRag\Backend\BackendInterface`. +- Solr +- ElasticSearch + +More backends (like MySQL or Sqlite) can be done easily, just implement the `Krisseck\PhpRag\Backend\BackendInterface`. ### LLM -Supports both Replicate and KoboldAI LLM services. Replicate is easier to use, you can get a free API key on https://replicate.com/. +- Replicate +- KoboldAI Horde + +Replicate is easier to use, you can get a free API key on https://replicate.com/. For Replicate, you need to provide the hash of the model version you will be using. You can get the hash from "Versions" tab on a model's page. diff --git a/composer.json b/composer.json index 757aa5b..31283fb 100644 --- a/composer.json +++ b/composer.json @@ -5,7 +5,8 @@ "guzzlehttp/guzzle": "^7.8", "vlucas/phpdotenv": "^5.5", "symfony/event-dispatcher": "^6.3", - "yethee/tiktoken": "^0.2.0" + "yethee/tiktoken": "^0.2.0", + "elasticsearch/elasticsearch": "^8.10" }, "autoload": { "psr-4": { @@ -17,5 +18,10 @@ "name": "Kristian Polso", "email": "kristian@polso.info" } - ] + ], + "config": { + "allow-plugins": { + "php-http/discovery": true + } + } } diff --git a/composer.lock b/composer.lock index d525593..118faf8 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,112 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "cb41b61274acc6b10f5983202375e02b", + "content-hash": "54b5eb62ba1702faeb53a80049512e8c", "packages": [ + { + "name": "elastic/transport", + "version": "v8.7.0", + "source": { + "type": "git", + "url": "git@github.com:elastic/elastic-transport-php.git", + "reference": "4d7937f026393186f48b2e4fba6d8db85ca0dba6" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/elastic/elastic-transport-php/zipball/4d7937f026393186f48b2e4fba6d8db85ca0dba6", + "reference": "4d7937f026393186f48b2e4fba6d8db85ca0dba6", + "shasum": "" + }, + "require": { + "composer-runtime-api": "^2.0", + "php": "^7.4 || ^8.0", + "php-http/discovery": "^1.14", + "php-http/httplug": "^2.3", + "psr/http-client": "^1.0", + "psr/http-factory": "^1.0", + "psr/http-message": "^1.0 || ^2.0", + "psr/log": "^1 || ^2 || ^3" + }, + "require-dev": { + "nyholm/psr7": "^1.5", + "php-http/mock-client": "^1.5", + "phpstan/phpstan": "^1.4", + "phpunit/phpunit": "^9.5" + }, + "type": "library", + "autoload": { + "psr-4": { + "Elastic\\Transport\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "HTTP transport PHP library for Elastic products", + "keywords": [ + "PSR_17", + "elastic", + "http", + "psr-18", + "psr-7", + "transport" + ], + "time": "2023-05-23T08:44:23+00:00" + }, + { + "name": "elasticsearch/elasticsearch", + "version": "v8.10.0", + "source": { + "type": "git", + "url": "git@github.com:elastic/elasticsearch-php.git", + "reference": "4237d7e125582cfa7560d97dd8c3baee622875f1" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/elastic/elasticsearch-php/zipball/4237d7e125582cfa7560d97dd8c3baee622875f1", + "reference": "4237d7e125582cfa7560d97dd8c3baee622875f1", + "shasum": "" + }, + "require": { + "elastic/transport": "^8.7", + "guzzlehttp/guzzle": "^7.0", + "php": "^7.4 || ^8.0", + "psr/http-client": "^1.0", + "psr/http-message": "^1.1 || ^2.0", + "psr/log": "^1|^2|^3" + }, + "require-dev": { + "ext-yaml": "*", + "ext-zip": "*", + "mockery/mockery": "^1.5", + "nyholm/psr7": "^1.5", + "php-http/message-factory": "^1.1", + "php-http/mock-client": "^1.5", + "phpstan/phpstan": "^1.4", + "phpunit/phpunit": "^9.5", + "symfony/finder": "~4.0", + "symfony/http-client": "^5.0|^6.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Elastic\\Elasticsearch\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "PHP Client for Elasticsearch", + "keywords": [ + "client", + "elastic", + "elasticsearch", + "search" + ], + "time": "2023-09-28T10:38:34+00:00" + }, { "name": "graham-campbell/result-type", "version": "v1.1.1", @@ -451,6 +555,193 @@ ], "time": "2022-10-12T11:40:33+00:00" }, + { + "name": "php-http/discovery", + "version": "1.19.1", + "source": { + "type": "git", + "url": "https://github.com/php-http/discovery.git", + "reference": "57f3de01d32085fea20865f9b16fb0e69347c39e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-http/discovery/zipball/57f3de01d32085fea20865f9b16fb0e69347c39e", + "reference": "57f3de01d32085fea20865f9b16fb0e69347c39e", + "shasum": "" + }, + "require": { + "composer-plugin-api": "^1.0|^2.0", + "php": "^7.1 || ^8.0" + }, + "conflict": { + "nyholm/psr7": "<1.0", + "zendframework/zend-diactoros": "*" + }, + "provide": { + "php-http/async-client-implementation": "*", + "php-http/client-implementation": "*", + "psr/http-client-implementation": "*", + "psr/http-factory-implementation": "*", + "psr/http-message-implementation": "*" + }, + "require-dev": { + "composer/composer": "^1.0.2|^2.0", + "graham-campbell/phpspec-skip-example-extension": "^5.0", + "php-http/httplug": "^1.0 || ^2.0", + "php-http/message-factory": "^1.0", + "phpspec/phpspec": "^5.1 || ^6.1 || ^7.3", + "symfony/phpunit-bridge": "^6.2" + }, + "type": "composer-plugin", + "extra": { + "class": "Http\\Discovery\\Composer\\Plugin", + "plugin-optional": true + }, + "autoload": { + "psr-4": { + "Http\\Discovery\\": "src/" + }, + "exclude-from-classmap": [ + "src/Composer/Plugin.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Márk Sági-Kazár", + "email": "mark.sagikazar@gmail.com" + } + ], + "description": "Finds and installs PSR-7, PSR-17, PSR-18 and HTTPlug implementations", + "homepage": "http://php-http.org", + "keywords": [ + "adapter", + "client", + "discovery", + "factory", + "http", + "message", + "psr17", + "psr7" + ], + "support": { + "issues": "https://github.com/php-http/discovery/issues", + "source": "https://github.com/php-http/discovery/tree/1.19.1" + }, + "time": "2023-07-11T07:02:26+00:00" + }, + { + "name": "php-http/httplug", + "version": "2.4.0", + "source": { + "type": "git", + "url": "https://github.com/php-http/httplug.git", + "reference": "625ad742c360c8ac580fcc647a1541d29e257f67" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-http/httplug/zipball/625ad742c360c8ac580fcc647a1541d29e257f67", + "reference": "625ad742c360c8ac580fcc647a1541d29e257f67", + "shasum": "" + }, + "require": { + "php": "^7.1 || ^8.0", + "php-http/promise": "^1.1", + "psr/http-client": "^1.0", + "psr/http-message": "^1.0 || ^2.0" + }, + "require-dev": { + "friends-of-phpspec/phpspec-code-coverage": "^4.1 || ^5.0 || ^6.0", + "phpspec/phpspec": "^5.1 || ^6.0 || ^7.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Http\\Client\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Eric GELOEN", + "email": "geloen.eric@gmail.com" + }, + { + "name": "Márk Sági-Kazár", + "email": "mark.sagikazar@gmail.com", + "homepage": "https://sagikazarmark.hu" + } + ], + "description": "HTTPlug, the HTTP client abstraction for PHP", + "homepage": "http://httplug.io", + "keywords": [ + "client", + "http" + ], + "support": { + "issues": "https://github.com/php-http/httplug/issues", + "source": "https://github.com/php-http/httplug/tree/2.4.0" + }, + "time": "2023-04-14T15:10:03+00:00" + }, + { + "name": "php-http/promise", + "version": "1.2.0", + "source": { + "type": "git", + "url": "https://github.com/php-http/promise.git", + "reference": "ef4905bfb492ff389eb7f12e26925a0f20073050" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-http/promise/zipball/ef4905bfb492ff389eb7f12e26925a0f20073050", + "reference": "ef4905bfb492ff389eb7f12e26925a0f20073050", + "shasum": "" + }, + "require": { + "php": "^7.1 || ^8.0" + }, + "require-dev": { + "friends-of-phpspec/phpspec-code-coverage": "^4.3.2 || ^6.3", + "phpspec/phpspec": "^5.1.2 || ^6.2 || ^7.4" + }, + "type": "library", + "autoload": { + "psr-4": { + "Http\\Promise\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Joel Wurtz", + "email": "joel.wurtz@gmail.com" + }, + { + "name": "Márk Sági-Kazár", + "email": "mark.sagikazar@gmail.com" + } + ], + "description": "Promise used for asynchronous HTTP requests", + "homepage": "http://httplug.io", + "keywords": [ + "promise" + ], + "support": { + "issues": "https://github.com/php-http/promise/issues", + "source": "https://github.com/php-http/promise/tree/1.2.0" + }, + "time": "2023-10-24T09:20:26+00:00" + }, { "name": "phpoption/phpoption", "version": "1.9.1", @@ -789,6 +1080,56 @@ }, "time": "2023-04-04T09:54:51+00:00" }, + { + "name": "psr/log", + "version": "3.0.0", + "source": { + "type": "git", + "url": "https://github.com/php-fig/log.git", + "reference": "fe5ea303b0887d5caefd3d431c3e61ad47037001" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/log/zipball/fe5ea303b0887d5caefd3d431c3e61ad47037001", + "reference": "fe5ea303b0887d5caefd3d431c3e61ad47037001", + "shasum": "" + }, + "require": { + "php": ">=8.0.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Log\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "https://www.php-fig.org/" + } + ], + "description": "Common interface for logging libraries", + "homepage": "https://github.com/php-fig/log", + "keywords": [ + "log", + "psr", + "psr-3" + ], + "support": { + "source": "https://github.com/php-fig/log/tree/3.0.0" + }, + "time": "2021-07-14T16:46:02+00:00" + }, { "name": "ralouphie/getallheaders", "version": "3.0.3", diff --git a/env.example b/env.example index dcad2c1..c524fdd 100644 --- a/env.example +++ b/env.example @@ -2,6 +2,10 @@ SOLR_HOST=solr SOLR_PORT=8983 SOLR_CORE=dev +ELASTIC_HOST=127.0.0.1:9200 +ELASTIC_API_KEY=yourapikeyhere +ELASTIC_INDEX=nameofindex + CONTEXT_TOKEN_COUNT=1000 REPLICATE_API_KEY=yourapikeyhere diff --git a/examples/chicago-prompt.php b/examples/chicago-prompt.php index 1c00b1a..0e0fba0 100644 --- a/examples/chicago-prompt.php +++ b/examples/chicago-prompt.php @@ -1,9 +1,5 @@ query($prompt, $documents)) { echo 'GOT RESPONSE' . PHP_EOL; - echo $response . PHP_EOL; - - var_dump($response); + echo trim($response) . PHP_EOL; } else { diff --git a/src/Backend/ElasticBackend.php b/src/Backend/ElasticBackend.php new file mode 100644 index 0000000..f118270 --- /dev/null +++ b/src/Backend/ElasticBackend.php @@ -0,0 +1,96 @@ +client = ClientBuilder::create() + ->setHosts([$elastic_host]) + ->setApiKey($elastic_api_key) + ->build(); + + $this->index = $elastic_index; + } + + /** + * Indexes a piece of content. + * + * @param $id + * @param $content + * @return bool + */ + public function indexContent($id, $content) + { + try { + $this->client->index([ + 'index' => $this->index, + 'id' => $id, + 'body' => [ + 'content' => $content + ] + ]); + return true; + } catch(\Exception $e) { + // Something failed when indexing + return false; + } + } + + public function clearIndex() + { + try { + $this->client->deleteByQuery([ + "index" => $this->index, + "body" => [ + "query" => [ + "match_all" => (object)[] + ] + ] + ]); + return true; + } catch(\Exception $e) { + // Something failed when deleting + return false; + } + } + + public function search($prompt, $amount = 5) + { + try { + + $response = $this->client->search([ + 'index' => $this->index, + 'size' => $amount, + 'body' => [ + 'query' => [ + 'match' => [ + 'content' => [ + 'query' => $prompt, + 'fuzziness' => 'AUTO', + 'minimum_should_match' => '50%' + ], + ] + ] + ] + ]); + + return array_map(function($result) { return $result['_source']['content']; }, $response->asArray()['hits']['hits']); + + } catch(\Exception $e) { + // Something failed when deleting + return []; + } + } + +} \ No newline at end of file