Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 50 additions & 7 deletions docker/sidecar/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
MAIN_PROCESS_NAME = os.getenv("MAIN_PROCESS_NAME", "")
# Version from build arg (set via Dockerfile ARG -> ENV)
VERSION = os.getenv("VERSION", "0.0.0-dev")
# Network isolation mode - when true, disables network-dependent features (e.g., Go module proxy)
NETWORK_ISOLATED = os.getenv("NETWORK_ISOLATED", "false").lower() in ("true", "1", "yes")


class ExecuteRequest(BaseModel):
Expand Down Expand Up @@ -186,6 +188,37 @@ def get_container_env(pid: int) -> dict[str, str]:
return {}


def apply_network_isolation_overrides(env: dict[str, str], language: str) -> dict[str, str]:
"""Apply environment overrides when network isolation is enabled.

When pods are network-isolated (egress blocked), certain language runtimes
need configuration changes to work offline. This function modifies the
environment to enable offline/air-gapped operation.

Args:
env: The container environment dictionary (will be modified in place)
language: The language being executed

Returns:
The modified environment dictionary
"""
if not NETWORK_ISOLATED:
return env

# Go: Disable module proxy and checksum database for offline operation
if language in ("go",):
env["GOPROXY"] = "off"
env["GOSUMDB"] = "off"
print(f"[EXECUTE] Network isolation: overriding GOPROXY=off, GOSUMDB=off", flush=True)

# Future: Add overrides for other languages as needed
# - Rust: CARGO_NET_OFFLINE=true
# - npm/Node: npm_config_offline=true
# - pip/Python: PIP_NO_INDEX=1

return env


def get_language_command(
language: str, code: str, working_dir: str, container_env: dict[str, str]
) -> tuple[list[str], Path | None]:
Expand Down Expand Up @@ -284,6 +317,9 @@ async def execute_via_nsenter(request: ExecuteRequest) -> ExecuteResponse:
# eliminating config drift between Dockerfiles and sidecar code
container_env = get_container_env(main_pid)

# Apply network isolation overrides if enabled
container_env = apply_network_isolation_overrides(container_env, LANGUAGE)

# Get the command for this language (this writes code to a temp file)
cmd, temp_file = get_language_command(
LANGUAGE, request.code, request.working_dir, container_env
Expand Down Expand Up @@ -316,25 +352,30 @@ async def execute_via_nsenter(request: ExecuteRequest) -> ExecuteResponse:
"--",
] + cmd

# Debug logging
print(f"[EXECUTE] main_pid={main_pid}, language={LANGUAGE}")
print(f"[EXECUTE] container_env PATH={container_env.get('PATH', 'NOT SET')}")
print(f"[EXECUTE] nsenter_cmd={nsenter_cmd}")
# Debug logging - use flush=True to ensure output before container termination
print(f"[EXECUTE] main_pid={main_pid}, language={LANGUAGE}", flush=True)
print(f"[EXECUTE] container_env PATH={container_env.get('PATH', 'NOT SET')}", flush=True)
print(f"[EXECUTE] nsenter_cmd={nsenter_cmd}", flush=True)
if temp_file:
print(f"[EXECUTE] code_file={temp_file}, exists={temp_file.exists()}, size={temp_file.stat().st_size if temp_file.exists() else 0}", flush=True)

try:
print(f"[EXECUTE] Creating subprocess...", flush=True)
proc = await asyncio.create_subprocess_exec(
*nsenter_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=request.working_dir,
)
print(f"[EXECUTE] Subprocess created, pid={proc.pid}, waiting for completion (timeout={request.timeout}s)...", flush=True)

try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(),
timeout=request.timeout,
)
except TimeoutError:
print(f"[EXECUTE] TIMEOUT after {request.timeout}s, killing process pid={proc.pid}", flush=True)
proc.kill()
await proc.wait()
return ExecuteResponse(
Expand All @@ -350,11 +391,11 @@ async def execute_via_nsenter(request: ExecuteRequest) -> ExecuteResponse:
stderr_str = stderr.decode("utf-8", errors="replace")[:MAX_OUTPUT_SIZE]

# Debug logging
print(f"[EXECUTE] exit_code={proc.returncode}, stdout_len={len(stdout_str)}, stderr_len={len(stderr_str)}")
print(f"[EXECUTE] exit_code={proc.returncode}, stdout_len={len(stdout_str)}, stderr_len={len(stderr_str)}", flush=True)
if stdout_str:
print(f"[EXECUTE] stdout preview: {stdout_str[:500]!r}")
print(f"[EXECUTE] stdout preview: {stdout_str[:500]!r}", flush=True)
if stderr_str:
print(f"[EXECUTE] stderr preview: {stderr_str[:500]!r}")
print(f"[EXECUTE] stderr preview: {stderr_str[:500]!r}", flush=True)

return ExecuteResponse(
exit_code=proc.returncode or 0,
Expand All @@ -364,6 +405,8 @@ async def execute_via_nsenter(request: ExecuteRequest) -> ExecuteResponse:
)

except Exception as e:
print(f"[EXECUTE] EXCEPTION: {type(e).__name__}: {e}", flush=True)
print(f"[EXECUTE] Traceback: {traceback.format_exc()}", flush=True)
return ExecuteResponse(
exit_code=1,
stdout="",
Expand Down
139 changes: 139 additions & 0 deletions helm-deployments/kubecoderun/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,145 @@ data:
{{- $defaultImage = printf "%s-%s:%s" $registry "d" $defaultTag }}
LANG_IMAGE_D: {{ .Values.execution.languages.d.image | default $defaultImage | quote }}

# Per-language resource limits (falls back to sidecar defaults)
# These control user code execution resources (code runs in sidecar's cgroup)
{{- $defaultCpuLimit := .Values.execution.sidecar.resources.limits.cpu }}
{{- $defaultMemoryLimit := .Values.execution.sidecar.resources.limits.memory }}
{{- $defaultCpuRequest := .Values.execution.sidecar.resources.requests.cpu }}
{{- $defaultMemoryRequest := .Values.execution.sidecar.resources.requests.memory }}
{{- with .Values.execution.languages.python.resources }}
LANG_CPU_LIMIT_PY: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_PY: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_PY: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_PY: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_PY: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_PY: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_PY: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_PY: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.javascript.resources }}
LANG_CPU_LIMIT_JS: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_JS: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_JS: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_JS: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_JS: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_JS: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_JS: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_JS: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.typescript.resources }}
LANG_CPU_LIMIT_TS: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_TS: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_TS: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_TS: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_TS: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_TS: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_TS: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_TS: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.go.resources }}
LANG_CPU_LIMIT_GO: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_GO: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_GO: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_GO: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_GO: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_GO: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_GO: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_GO: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.java.resources }}
LANG_CPU_LIMIT_JAVA: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_JAVA: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_JAVA: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_JAVA: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_JAVA: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_JAVA: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_JAVA: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_JAVA: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.rust.resources }}
LANG_CPU_LIMIT_RS: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_RS: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_RS: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_RS: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_RS: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_RS: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_RS: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_RS: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.c.resources }}
LANG_CPU_LIMIT_C: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_C: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_C: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_C: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_C: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_C: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_C: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_C: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.cpp.resources }}
LANG_CPU_LIMIT_CPP: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_CPP: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_CPP: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_CPP: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_CPP: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_CPP: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_CPP: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_CPP: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.php.resources }}
LANG_CPU_LIMIT_PHP: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_PHP: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_PHP: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_PHP: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_PHP: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_PHP: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_PHP: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_PHP: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.r.resources }}
LANG_CPU_LIMIT_R: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_R: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_R: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_R: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_R: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_R: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_R: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_R: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.fortran.resources }}
LANG_CPU_LIMIT_F90: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_F90: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_F90: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_F90: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_F90: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_F90: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_F90: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_F90: {{ $defaultMemoryRequest | quote }}
{{- end }}
{{- with .Values.execution.languages.d.resources }}
LANG_CPU_LIMIT_D: {{ .limits.cpu | default $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_D: {{ .limits.memory | default $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_D: {{ .requests.cpu | default $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_D: {{ .requests.memory | default $defaultMemoryRequest | quote }}
{{- else }}
LANG_CPU_LIMIT_D: {{ $defaultCpuLimit | quote }}
LANG_MEMORY_LIMIT_D: {{ $defaultMemoryLimit | quote }}
LANG_CPU_REQUEST_D: {{ $defaultCpuRequest | quote }}
LANG_MEMORY_REQUEST_D: {{ $defaultMemoryRequest | quote }}
{{- end }}

# Execution Limits
MAX_EXECUTION_TIME: {{ .Values.execution.maxExecutionTime | quote }}
MAX_MEMORY_MB: {{ .Values.resourceLimits.maxMemoryMb | quote }}
Expand Down
11 changes: 11 additions & 0 deletions helm-deployments/kubecoderun/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -210,15 +210,26 @@ execution:
# poolSize = 0: use Jobs (cold start)
# Images default to {imageRegistry}-{language}:{imageTag or appVersion}
# Set image: to override with a custom image for a specific language
# Set resources: to override CPU/memory limits for specific languages
# (falls back to sidecar.resources if not specified)
languages:
python:
poolSize: 5
# resources: (uses sidecar.resources defaults)
javascript:
poolSize: 2
typescript:
poolSize: 0
go:
poolSize: 0
# Go compilation is CPU-intensive; consider increasing resources:
# resources:
# limits:
# cpu: "2"
# memory: "1Gi"
# requests:
# cpu: "500m"
# memory: "512Mi"
java:
poolSize: 0
rust:
Expand Down
43 changes: 20 additions & 23 deletions src/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,8 @@ def get_pool_configs(self):
from ..services.kubernetes.models import PoolConfig

configs = []
languages = ["py", "js", "ts", "go", "java", "c", "cpp", "php", "rs", "r", "f90", "d"]

pool_sizes = {
"py": self.pod_pool_py,
"js": self.pod_pool_js,
Expand All @@ -594,26 +596,20 @@ def get_pool_configs(self):
"d": self.pod_pool_d,
}

# Per-language image overrides from environment (LANG_IMAGE_<LANG>)
# Falls back to auto-generated registry/tag pattern if not set
image_overrides = {
"py": os.getenv("LANG_IMAGE_PY"),
"js": os.getenv("LANG_IMAGE_JS"),
"ts": os.getenv("LANG_IMAGE_TS"),
"go": os.getenv("LANG_IMAGE_GO"),
"java": os.getenv("LANG_IMAGE_JAVA"),
"c": os.getenv("LANG_IMAGE_C"),
"cpp": os.getenv("LANG_IMAGE_CPP"),
"php": os.getenv("LANG_IMAGE_PHP"),
"rs": os.getenv("LANG_IMAGE_RS"),
"r": os.getenv("LANG_IMAGE_R"),
"f90": os.getenv("LANG_IMAGE_F90"),
"d": os.getenv("LANG_IMAGE_D"),
}
for lang in languages:
lang_upper = lang.upper()
pool_size = pool_sizes[lang]

# Per-language image override (LANG_IMAGE_<LANG>)
image = os.getenv(f"LANG_IMAGE_{lang_upper}") or self.kubernetes.get_image_for_language(lang)

# Per-language resource limits (LANG_CPU_LIMIT_<LANG>, etc.)
# Falls back to global sidecar defaults
sidecar_cpu_limit = os.getenv(f"LANG_CPU_LIMIT_{lang_upper}") or self.k8s_sidecar_cpu_limit
sidecar_memory_limit = os.getenv(f"LANG_MEMORY_LIMIT_{lang_upper}") or self.k8s_sidecar_memory_limit
sidecar_cpu_request = os.getenv(f"LANG_CPU_REQUEST_{lang_upper}") or self.k8s_sidecar_cpu_request
sidecar_memory_request = os.getenv(f"LANG_MEMORY_REQUEST_{lang_upper}") or self.k8s_sidecar_memory_request

for lang, pool_size in pool_sizes.items():
# Use explicit image override if set, otherwise auto-generate
image = image_overrides.get(lang) or self.kubernetes.get_image_for_language(lang)
configs.append(
PoolConfig(
language=lang,
Expand All @@ -622,12 +618,13 @@ def get_pool_configs(self):
sidecar_image=self.k8s_sidecar_image,
cpu_limit=self.k8s_cpu_limit,
memory_limit=self.k8s_memory_limit,
sidecar_cpu_limit=self.k8s_sidecar_cpu_limit,
sidecar_memory_limit=self.k8s_sidecar_memory_limit,
sidecar_cpu_request=self.k8s_sidecar_cpu_request,
sidecar_memory_request=self.k8s_sidecar_memory_request,
sidecar_cpu_limit=sidecar_cpu_limit,
sidecar_memory_limit=sidecar_memory_limit,
sidecar_cpu_request=sidecar_cpu_request,
sidecar_memory_request=sidecar_memory_request,
image_pull_policy=self.k8s_image_pull_policy,
seccomp_profile_type=self.k8s_seccomp_profile_type,
network_isolated=self.enable_network_isolation,
)
)

Expand Down
1 change: 1 addition & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ async def lifespan(app: FastAPI):
default_cpu_request=settings.k8s_cpu_request,
default_memory_request=settings.k8s_memory_request,
seccomp_profile_type=settings.k8s_seccomp_profile_type,
network_isolated=settings.enable_network_isolation,
)

await kubernetes_manager.start()
Expand Down
2 changes: 2 additions & 0 deletions src/services/kubernetes/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def create_pod_manifest(
sidecar_cpu_request: str = "100m",
sidecar_memory_request: str = "256Mi",
seccomp_profile_type: str = "RuntimeDefault",
network_isolated: bool = False,
) -> client.V1Pod:
"""Create a Pod manifest for code execution.

Expand Down Expand Up @@ -304,6 +305,7 @@ def create_pod_manifest(
client.V1EnvVar(name="LANGUAGE", value=language),
client.V1EnvVar(name="WORKING_DIR", value="/mnt/data"),
client.V1EnvVar(name="SIDECAR_PORT", value=str(sidecar_port)),
client.V1EnvVar(name="NETWORK_ISOLATED", value=str(network_isolated).lower()),
],
readiness_probe=client.V1Probe(
http_get=client.V1HTTPGetAction(path="/ready", port=sidecar_port),
Expand Down
1 change: 1 addition & 0 deletions src/services/kubernetes/job_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ async def create_job(
sidecar_cpu_request=spec.sidecar_cpu_request,
sidecar_memory_request=spec.sidecar_memory_request,
seccomp_profile_type=spec.seccomp_profile_type,
network_isolated=spec.network_isolated,
ttl_seconds_after_finished=self.ttl_seconds_after_finished,
active_deadline_seconds=self.active_deadline_seconds,
)
Expand Down
Loading