Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add model based hybrid search optimizer #140

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions middleware/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
FROM python:3.8-slim-buster
FROM python:3.12.6

WORKDIR /python-docker

COPY requirements.txt requirements.txt
RUN pip3 install -r requirements.txt
RUN pip install --upgrade pip && pip3 install -r requirements.txt

# We are mounting the local filesystem into Docker via the Docker Compose volumes for developer joy.
COPY . .
Expand Down
63 changes: 62 additions & 1 deletion middleware/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
import os
import uuid
import flask
import string
import pickle
import requests
from flask import Flask, request, Response
import numpy as np
from flask import Flask, request, Response, jsonify
from sklearn.ensemble import RandomForestRegressor
from flask_cors import CORS
from opensearchpy import OpenSearch
from opentelemetry import trace
Expand Down Expand Up @@ -282,6 +286,63 @@ def dump_cache():
response = flask.jsonify(cache=cache)
return response

with open("model.pkl", "rb") as model_file:
model = pickle.load(model_file)

@app.route("/get_neuralness", methods=["GET"])
def get_neuralness():
# Get the query string from the request
query = request.args.get("query")
if not query:
return jsonify({"error": "Query parameter is missing"}), 400

# Initialize variables to track the maximum prediction and corresponding neuralness value
max_prediction = float('-inf')
best_neuralness = None

# Iterate over neuralness values from 0 to 1.0 in 0.1 steps
for i in range(11): # 11 because we need 0 to 10 inclusive
neuralness = i * 0.1

# Calculate features
features = [
neuralness,
num_of_terms(query),
query_length(query),
has_numbers(query),
has_special_char(query)
]

# Predict the value using the model
try:
prediction = model.predict([features])[0]
except Exception as e:
return jsonify({"error": f"Model prediction failed: {str(e)}"}), 500

# Update max_prediction and best_neuralness if current prediction is greater
if prediction > max_prediction:
max_prediction = prediction
best_neuralness = neuralness

# Return the prediction as JSON
print(f"Ran predictions for query {query}")
return jsonify({"best_neuralness": best_neuralness})

def num_of_terms(query_string):
terms = query_string.split(" ")
return len(terms)

def query_length(query_string):
return len(query_string)

def has_numbers(query_string):
return int(any(char.isdigit() for char in query_string))

def has_special_char(query_string):
# Define special characters (all non-alphanumeric characters)
special_chars = string.punctuation
# Return True if any character in the string is a special character
return int(any(char in special_chars for char in query_string))

if __name__ == "__main__":
app.run(host="0.0.0.0", port=9090, debug=True)
Binary file added middleware/model.pkl
Binary file not shown.
4 changes: 3 additions & 1 deletion middleware/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ opensearch-py==2.7.1
opentelemetry-api==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-exporter-otlp-proto-http==1.27.0
opentelemetry-exporter-otlp-proto-grpc==1.27.0
opentelemetry-exporter-otlp-proto-grpc==1.27.0
scikit-learn==1.5.2
numpy==1.26.4
2 changes: 2 additions & 0 deletions opensearch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ RUN /usr/share/opensearch/bin/opensearch-plugin install --batch telemetry-otel

# Install the opensearch-ubi plugin.
RUN /usr/share/opensearch/bin/opensearch-plugin install --batch https://github.com/opensearch-project/user-behavior-insights/releases/download/2.18.0.2/opensearch-ubi-2.18.0.2.zip
ADD ./search-eval-plugin/search-quality-evaluation-plugin-2.18.0.0.zip /tmp/search-quality-evaluation-plugin.zip
RUN /usr/share/opensearch/bin/opensearch-plugin install --batch file:/tmp/search-quality-evaluation-plugin.zip
Binary file not shown.
4 changes: 3 additions & 1 deletion quickstart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ if $shutdown; then
exit
fi

docker compose up -d --build ${services}
docker compose build --no-cache ${services}

docker compose up -d ${services}

echo -e "${MAJOR}Waiting for OpenSearch to start up and be online.${RESET}"
./opensearch/wait-for-os.sh # Wait for OpenSearch to be online
Expand Down