Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ container:
$(if $(filter docker,$(CONTAINER_ENGINE)),--net=host) \
-v ~/.config/gcloud:/home/evalbench/.config/gcloud \
-e GOOGLE_CLOUD_PROJECT=cloud-db-nl2sql \
-e MESOP_XSRF_CHECK=false \
--cap-add=SYS_PTRACE \
-p 3000:3000 \
-p 50051:50051 \
Expand Down Expand Up @@ -70,6 +71,10 @@ push:
$(CONTAINER_ENGINE) image tag evalbench:latest us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest
$(CONTAINER_ENGINE) push us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest

push-corprun:
$(CONTAINER_ENGINE) image tag evalbench:latest us-central1-docker.pkg.dev/evalbench-dev/cr-images/eval_server:latest
$(CONTAINER_ENGINE) push us-central1-docker.pkg.dev/evalbench-dev/cr-images/eval_server:latest

deploy:
gcloud container clusters get-credentials evalbench-directpath-cluster --zone us-central1-c --project cloud-db-nl2sql
kubectl apply -f evalbench_service/k8s/namespace.yaml
Expand All @@ -88,6 +93,22 @@ deploy-test:
kubectl apply -f evalbench_service/k8s/evalbench-test.yaml
kubectl apply -f evalbench_service/k8s/vertical-autoscale-test.yaml

deploy-corprun:
gcloud run deploy evalbench \
--project=evalbench-dev \
--region=us-central1 \
--image=us-central1-docker.pkg.dev/evalbench-dev/cr-images/eval_server:latest \
--port=3000 \
--memory=2Gi \
--service-account=crsvc-evalbench@evalbench-dev.iam.gserviceaccount.com \
--set-env-vars CLOUD_RUN=True,GOOGLE_CLOUD_PROJECT=evalbench-dev,MESOP_XSRF_CHECK=false \
--ingress=internal-and-cloud-load-balancing \
--network=cr-infra-vpc-network \
--subnet=cr-infra-subnetwork \
--vpc-egress=all-traffic \
--add-volume=name=session-files,type=cloud-storage,bucket=evalbench-sessions-cloud-db-nl2sql \
--add-volume-mount=volume=session-files,mount-path=/tmp_session_files

undeploy:
gcloud container clusters get-credentials evalbench-directpath-cluster --zone us-central1-c --project cloud-db-nl2sql
kubectl delete -f evalbench_service/k8s/evalbench.yaml
Expand Down
4 changes: 3 additions & 1 deletion cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ steps:
- name: 'gcr.io/cloud-builders/docker'
args: ['build', '-t', 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:$COMMIT_SHA', '-f', 'evalbench_service/Dockerfile', '.']
- name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:$COMMIT_SHA'
args: ['evalbench/run.sh']
dir: '/evalbench'
args: ['/evalbench/evalbench/run.sh']
env:
- 'EVAL_GCP_PROJECT_ID=${_VAR_PROJECT}'
- 'EVAL_GCP_PROJECT_REGION=${_VAR_REGION}'
- 'EVAL_CONFIG=${_VAR_EVAL_CONFIG}'
- 'UV_CACHE_DIR=/tmp/uv-cache'
images:
- 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:$COMMIT_SHA'
substitutions:
Expand Down
10 changes: 8 additions & 2 deletions evalbench_service/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,19 @@ RUN mkdir -p /home/evalbench && \

COPY . evalbench
WORKDIR evalbench
RUN uv sync
RUN uv pip install --system --break-system-packages -r requirements.txt
RUN uv pip install --system --break-system-packages ./viewer
RUN uv pip install --system --break-system-packages .

RUN ln -s /usr/bin/python3 /usr/bin/python
RUN make proto -f ./Makefile
RUN mkdir /tmp_session_files /tmp_sessions
RUN cp /evalbench/evalbench_service/supervisord.conf /evalbench/supervisord.conf
RUN cp /evalbench/evalbench_service/entrypoint.sh /evalbench/entrypoint.sh && chmod +x /evalbench/entrypoint.sh
RUN chmod +x /evalbench/evalbench/run.sh
RUN chown -R 65532:65532 /evalbench /tmp /tmp_session_files /tmp_sessions /home/evalbench

CMD ["/usr/bin/supervisord", "-c", "/evalbench/supervisord.conf"]
USER 65532
WORKDIR /evalbench
CMD ["/evalbench/entrypoint.sh"]
EXPOSE 50051 3000
12 changes: 12 additions & 0 deletions evalbench_service/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
# evalbench_service/entrypoint.sh

if [[ "$CLOUD_RUN" == "True" ]]; then
echo "Cloud Run detected. Starting only gunicorn frontend on port ${PORT:-3000}..."
# Ensure we are in the viewer directory for gunicorn to find main:me
cd /evalbench/viewer
exec gunicorn -w 4 -k gevent main:me --bind :${PORT:-3000} --forwarded-allow-ips="*"
else
echo "Starting supervisord to manage multiple processes..."
exec /usr/bin/supervisord -c /evalbench/supervisord.conf
fi
4 changes: 2 additions & 2 deletions evalbench_service/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ logfile_backups=5


[program:evalbench_server]
command=uv run evalbench/eval_server.py
command=python evalbench/eval_server.py
directory=/evalbench
autostart=true
autorestart=true
Expand All @@ -16,7 +16,7 @@ stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0

[program:evalbench_frontend]
command=uv run gunicorn -w 4 -k gevent main:me --bind :3000
command=gunicorn -w 4 -k gevent main:me --bind :3000 --forwarded-allow-ips="*"
directory=/evalbench/viewer
autostart=true
autorestart=true
Expand Down
68 changes: 47 additions & 21 deletions viewer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@

logging.basicConfig(level=logging.INFO)

# Manually enable debug mode to bypass XSRF check if needed
# (e.g. when running in container behind a proxy)
if os.environ.get("MESOP_XSRF_CHECK") == "false":
try:
import mesop.runtime as mesop_runtime
mesop_runtime.enable_debug_mode()
except Exception as e:
logging.error(f"Failed to enable debug mode: {e}")

try:
import dashboard
import conversations
Expand Down Expand Up @@ -54,37 +63,54 @@ class State:
conversation_index: int = 0


@me.page(
path="/",
title="Evalbench",
stylesheets=[
"data:",
"data:text/css;charset=utf-8,"
".mdc-tooltip__surface%20%7B%0A"
"%20%20max-height%3A%20none%20%21important%3B%0A"
"%20%20max-width%3A%20none%20%21important%3B%0A"
"%20%20white-space%3A%20pre-wrap%20%21important%3B%0A"
"%7D",
],
)
def app():
state = me.state(State)

def get_results_dir():
# Check multiple locations for results directory
results_dir_candidates = [
"/tmp_session_files/results",
os.path.join(os.path.dirname(os.path.dirname(__file__)), "results"),
os.path.join(os.getcwd(), "results"),
]

results_dir = None
for candidate in results_dir_candidates:
if os.path.exists(candidate) and os.path.isdir(candidate):
results_dir = candidate
break
return candidate

return results_dir_candidates[1] # Fallback to default


def on_load(e: me.LoadEvent):
state = me.state(State)
results_dir = get_results_dir()
directories = []
if os.path.exists(results_dir):
# List directories only
directories = [
d
for d in os.listdir(results_dir)
if os.path.isdir(os.path.join(results_dir, d))
]

job_id = me.query_params.get("job_id") or me.query_params.get("jobid")
if job_id and job_id in directories:
state.selected_directory = job_id

if results_dir is None:
results_dir = results_dir_candidates[1] # Fallback to default

@me.page(
path="/",
title="Evalbench",
on_load=on_load,
security_policy=me.SecurityPolicy(
dangerously_disable_trusted_types=True,
cross_origin_opener_policy="same-origin",
),
stylesheets=[
"data:",
"/static/custom.css",
],
)
def app():
state = me.state(State)
results_dir = get_results_dir()

directories = []
if os.path.exists(results_dir):
Expand Down
5 changes: 5 additions & 0 deletions viewer/static/custom.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.mdc-tooltip__surface {
max-height: none !important;
max-width: none !important;
white-space: pre-wrap !important;
}
Loading