diff --git a/Makefile b/Makefile index 9596ee2e..a8ce70a3 100644 --- a/Makefile +++ b/Makefile @@ -40,6 +40,7 @@ container: $(if $(filter docker,$(CONTAINER_ENGINE)),--net=host) \ -v ~/.config/gcloud:/home/evalbench/.config/gcloud \ -e GOOGLE_CLOUD_PROJECT=cloud-db-nl2sql \ + -e MESOP_XSRF_CHECK=false \ --cap-add=SYS_PTRACE \ -p 3000:3000 \ -p 50051:50051 \ @@ -70,6 +71,10 @@ push: $(CONTAINER_ENGINE) image tag evalbench:latest us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest $(CONTAINER_ENGINE) push us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest +push-corprun: + $(CONTAINER_ENGINE) image tag evalbench:latest us-central1-docker.pkg.dev/evalbench-dev/cr-images/eval_server:latest + $(CONTAINER_ENGINE) push us-central1-docker.pkg.dev/evalbench-dev/cr-images/eval_server:latest + deploy: gcloud container clusters get-credentials evalbench-directpath-cluster --zone us-central1-c --project cloud-db-nl2sql kubectl apply -f evalbench_service/k8s/namespace.yaml @@ -88,6 +93,22 @@ deploy-test: kubectl apply -f evalbench_service/k8s/evalbench-test.yaml kubectl apply -f evalbench_service/k8s/vertical-autoscale-test.yaml +deploy-corprun: + gcloud run deploy evalbench \ + --project=evalbench-dev \ + --region=us-central1 \ + --image=us-central1-docker.pkg.dev/evalbench-dev/cr-images/eval_server:latest \ + --port=3000 \ + --memory=2Gi \ + --service-account=crsvc-evalbench@evalbench-dev.iam.gserviceaccount.com \ + --set-env-vars CLOUD_RUN=True,GOOGLE_CLOUD_PROJECT=evalbench-dev,MESOP_XSRF_CHECK=false \ + --ingress=internal-and-cloud-load-balancing \ + --network=cr-infra-vpc-network \ + --subnet=cr-infra-subnetwork \ + --vpc-egress=all-traffic \ + --add-volume=name=session-files,type=cloud-storage,bucket=evalbench-sessions-cloud-db-nl2sql \ + --add-volume-mount=volume=session-files,mount-path=/tmp_session_files + undeploy: gcloud container clusters get-credentials evalbench-directpath-cluster --zone us-central1-c --project cloud-db-nl2sql kubectl delete -f evalbench_service/k8s/evalbench.yaml diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 38fa7fac..10d71752 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -3,11 +3,13 @@ steps: - name: 'gcr.io/cloud-builders/docker' args: ['build', '-t', 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:$COMMIT_SHA', '-f', 'evalbench_service/Dockerfile', '.'] - name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:$COMMIT_SHA' - args: ['evalbench/run.sh'] + dir: '/evalbench' + args: ['/evalbench/evalbench/run.sh'] env: - 'EVAL_GCP_PROJECT_ID=${_VAR_PROJECT}' - 'EVAL_GCP_PROJECT_REGION=${_VAR_REGION}' - 'EVAL_CONFIG=${_VAR_EVAL_CONFIG}' + - 'UV_CACHE_DIR=/tmp/uv-cache' images: - 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:$COMMIT_SHA' substitutions: diff --git a/evalbench_service/Dockerfile b/evalbench_service/Dockerfile index d05cd049..d0fb5588 100644 --- a/evalbench_service/Dockerfile +++ b/evalbench_service/Dockerfile @@ -27,13 +27,19 @@ RUN mkdir -p /home/evalbench && \ COPY . evalbench WORKDIR evalbench -RUN uv sync +RUN uv pip install --system --break-system-packages -r requirements.txt +RUN uv pip install --system --break-system-packages ./viewer +RUN uv pip install --system --break-system-packages . RUN ln -s /usr/bin/python3 /usr/bin/python RUN make proto -f ./Makefile RUN mkdir /tmp_session_files /tmp_sessions RUN cp /evalbench/evalbench_service/supervisord.conf /evalbench/supervisord.conf +RUN cp /evalbench/evalbench_service/entrypoint.sh /evalbench/entrypoint.sh && chmod +x /evalbench/entrypoint.sh +RUN chmod +x /evalbench/evalbench/run.sh RUN chown -R 65532:65532 /evalbench /tmp /tmp_session_files /tmp_sessions /home/evalbench -CMD ["/usr/bin/supervisord", "-c", "/evalbench/supervisord.conf"] +USER 65532 +WORKDIR /evalbench +CMD ["/evalbench/entrypoint.sh"] EXPOSE 50051 3000 \ No newline at end of file diff --git a/evalbench_service/entrypoint.sh b/evalbench_service/entrypoint.sh new file mode 100644 index 00000000..56520d32 --- /dev/null +++ b/evalbench_service/entrypoint.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# evalbench_service/entrypoint.sh + +if [[ "$CLOUD_RUN" == "True" ]]; then + echo "Cloud Run detected. Starting only gunicorn frontend on port ${PORT:-3000}..." + # Ensure we are in the viewer directory for gunicorn to find main:me + cd /evalbench/viewer + exec gunicorn -w 4 -k gevent main:me --bind :${PORT:-3000} --forwarded-allow-ips="*" +else + echo "Starting supervisord to manage multiple processes..." + exec /usr/bin/supervisord -c /evalbench/supervisord.conf +fi diff --git a/evalbench_service/supervisord.conf b/evalbench_service/supervisord.conf index d2ff9890..0d18da06 100644 --- a/evalbench_service/supervisord.conf +++ b/evalbench_service/supervisord.conf @@ -6,7 +6,7 @@ logfile_backups=5 [program:evalbench_server] -command=uv run evalbench/eval_server.py +command=python evalbench/eval_server.py directory=/evalbench autostart=true autorestart=true @@ -16,7 +16,7 @@ stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 [program:evalbench_frontend] -command=uv run gunicorn -w 4 -k gevent main:me --bind :3000 +command=gunicorn -w 4 -k gevent main:me --bind :3000 --forwarded-allow-ips="*" directory=/evalbench/viewer autostart=true autorestart=true diff --git a/viewer/main.py b/viewer/main.py index fc68452d..efad6003 100644 --- a/viewer/main.py +++ b/viewer/main.py @@ -6,6 +6,15 @@ logging.basicConfig(level=logging.INFO) +# Manually enable debug mode to bypass XSRF check if needed +# (e.g. when running in container behind a proxy) +if os.environ.get("MESOP_XSRF_CHECK") == "false": + try: + import mesop.runtime as mesop_runtime + mesop_runtime.enable_debug_mode() + except Exception as e: + logging.error(f"Failed to enable debug mode: {e}") + try: import dashboard import conversations @@ -54,22 +63,7 @@ class State: conversation_index: int = 0 -@me.page( - path="/", - title="Evalbench", - stylesheets=[ - "data:", - "data:text/css;charset=utf-8," - ".mdc-tooltip__surface%20%7B%0A" - "%20%20max-height%3A%20none%20%21important%3B%0A" - "%20%20max-width%3A%20none%20%21important%3B%0A" - "%20%20white-space%3A%20pre-wrap%20%21important%3B%0A" - "%7D", - ], -) -def app(): - state = me.state(State) - +def get_results_dir(): # Check multiple locations for results directory results_dir_candidates = [ "/tmp_session_files/results", @@ -77,14 +71,46 @@ def app(): os.path.join(os.getcwd(), "results"), ] - results_dir = None for candidate in results_dir_candidates: if os.path.exists(candidate) and os.path.isdir(candidate): - results_dir = candidate - break + return candidate + + return results_dir_candidates[1] # Fallback to default + + +def on_load(e: me.LoadEvent): + state = me.state(State) + results_dir = get_results_dir() + directories = [] + if os.path.exists(results_dir): + # List directories only + directories = [ + d + for d in os.listdir(results_dir) + if os.path.isdir(os.path.join(results_dir, d)) + ] + + job_id = me.query_params.get("job_id") or me.query_params.get("jobid") + if job_id and job_id in directories: + state.selected_directory = job_id - if results_dir is None: - results_dir = results_dir_candidates[1] # Fallback to default + +@me.page( + path="/", + title="Evalbench", + on_load=on_load, + security_policy=me.SecurityPolicy( + dangerously_disable_trusted_types=True, + cross_origin_opener_policy="same-origin", + ), + stylesheets=[ + "data:", + "/static/custom.css", + ], +) +def app(): + state = me.state(State) + results_dir = get_results_dir() directories = [] if os.path.exists(results_dir): diff --git a/viewer/static/custom.css b/viewer/static/custom.css new file mode 100644 index 00000000..9fff3588 --- /dev/null +++ b/viewer/static/custom.css @@ -0,0 +1,5 @@ +.mdc-tooltip__surface { + max-height: none !important; + max-width: none !important; + white-space: pre-wrap !important; +}