Skip to content
This repository has been archived by the owner on Dec 4, 2023. It is now read-only.

Check update and trigger update #121

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
# For Dev Registry: https://raw.githubusercontent.com/premAI-io/prem-registry/dev/manifests.json
PREM_REGISTRY_URL=https://raw.githubusercontent.com/premAI-io/prem-registry/main/manifests.json

# Prem Daemon
# ------------------------------------------------------------------------------------------
DEFAULT_PORT='8000'
PREMD_IMAGE=ghcr.io/premai-io/premd

# Sentry
# ------------------------------------------------------------------------------------------
SENTRY_DSN=https://[email protected]/4505244431941632
Expand Down
2 changes: 2 additions & 0 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
DEBUG: bool = os.getenv("DEBUG", False)
SECRET_KEY: Secret = Secret(os.getenv("SECRET_KEY", ""))
PROJECT_NAME: str = os.getenv("PROJECT_NAME", "Prem Daemon")
PREMD_IMAGE: str = os.getenv("PREMD_IMAGE", "ghcr.io/premai-io/premd")
DEFAULT_PORT: int = int(os.getenv("DEFAULT_PORT", "8000"))

# PROXY
# ------------------------------------------------------------------------------
Expand Down
17 changes: 17 additions & 0 deletions app/core/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,23 @@

def create_start_app_handler(app: FastAPI):
def start_app() -> None:
# client = utils.get_docker_client()
# if utils.container_exists(container_name):
# container = client.containers.get(container_name)
# host_port = container.ports.get(f"{utils.DEFAULT_PORT}/tcp", [None])[0][
# "HostPort"
# ]
# if host_port != f"{utils.DEFAULT_PORT}":
# utils.check_host_port_availability(utils.DEFAULT_PORT)
# new_container = utils.create_new_container(
# utils.PREMD_IMAGE,
# "latest",
# new_container_name,
# container_name,
# utils.DEFAULT_PORT,
# )
# utils.update_and_remove_old_container(container_name)
# new_container.start()
for registry in config.PREM_REGISTRY_URL.strip().split():
utils.add_services_from_registry(registry)

Expand Down
128 changes: 128 additions & 0 deletions app/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import logging
import re
import subprocess
import time
import xml.etree.ElementTree as ET

import docker
import requests
import torch
from bs4 import BeautifulSoup
from packaging.version import parse as parse_version

from app.core import config

logger = logging.getLogger(__name__)

PREMD_IMAGE = config.PREMD_IMAGE
DEFAULT_PORT = config.DEFAULT_PORT
SERVICES = []
REGISTRIES = config.PREM_REGISTRY_URL.strip().split()
INTERFACES = [
Expand Down Expand Up @@ -198,6 +204,128 @@ def get_gpu_info():
return gpu_name, total_memory_value, used_memory_value, mem_percentage


def extract_labels_from_html_file(html_content, class_names):
soup = BeautifulSoup(html_content, "html.parser")
labels = soup.select(class_names)
return (label.get_text() for label in labels)


def find_maximum_label(labels):
pattern = re.compile(r"v\d+\.\d+\.\d+$")
return max(filter(pattern.match, labels), default=None, key=parse_version)


def get_premd_last_tag(owner, repository, package):
response = requests.get(
f"https://github.com/{owner}/{repository}/pkgs/container/{package}"
Copy link
Contributor

@Janaka-Steph Janaka-Steph Sep 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not calling the API instead?
Something like this, with an authentication token if necessary:

def get_premd_last_tag():
    try:
        url = f'https://api.github.com/orgs/premai-io/packages/container/premd/versions'
        headers = {
            'Accept': 'application/vnd.github+json'
        }
        response = requests.get(url, headers)
        if response.status_code == 200:
            data = response.json()
            if data:
                # Find the latest version by sorting by created_at in descending order
                latest_version = max(data, key=lambda x: x['created_at'])
                return latest_version['name']
            else:
                print("No versions found for the image.")
        else:
            logger.error(f"Failed to retrieve data from GitHub API. Status code: {response.status_code}")
            return None
    except Exception as e:
        logger.error(f"An error occurred: {e}")
        return None

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because we'd have to hardcode an auth token (this particular API requires authentication even to access public info)

)
try:
labels = extract_labels_from_html_file(
response.content, ".Label.mr-1.mb-2.text-normal"
)
except Exception as e:
logger.info(f"Unexpected error: {e}")
return "latest"
else:
return find_maximum_label(labels)


def get_local_docker_image_tags(owner, repository):
try:
client = get_docker_client()
image = client.images.get(f"ghcr.io/{owner}/{repository}")
return image.tags
except Exception as e:
logger.info(f"Unexpected error: {e}")
return []


def create_new_container(image_name, image_tag, new_container_name, old_container_name):
client = get_docker_client()
old_container = client.containers.get(old_container_name)

if is_gpu_available():
device_requests = [
docker.types.DeviceRequest(device_ids=["all"], capabilities=[["gpu"]])
]
else:
device_requests = []

volumes = {}
for mount in old_container.attrs["Mounts"]:
source = mount["Source"]
target = mount["Destination"]
mode = mount["Mode"]
volumes[source] = {"bind": target, "mode": mode}

current_ports = old_container.attrs["HostConfig"]["PortBindings"]
current_port = list(current_ports.items())[0]

logger.info(
f"Starting new container {new_container_name} with image {image_name}:{image_tag} at port {current_port[0]}"
)
new_container = client.containers.create(
image=f"{image_name}:{image_tag}",
name=new_container_name,
ports={current_port[0]: current_port[1]},
volumes=volumes,
environment=old_container.attrs["Config"]["Env"],
device_requests=device_requests,
network_mode=old_container.attrs["HostConfig"]["NetworkMode"],
detach=True,
)
return new_container


def update_and_remove_old_container(old_container_name):
client = get_docker_client()
logger.info(f"Stopping {old_container_name}")
old_container = client.containers.get(old_container_name)
old_container.stop()


def update_container():
new_container = create_new_container(
PREMD_IMAGE, "latest", "new_container", "premd"
)
update_and_remove_old_container("premd")
new_container.start()
new_container.rename("premd")


def check_host_port_availability(host_port, timeout=30):
start_time = time.time()
client = get_docker_client()

while True:
if time.time() - start_time > timeout:
return False

containers = client.containers.list()
port_used = any(
f"{host_port}/tcp" in container.ports
for container in containers
if container.status == "running"
)

if not port_used:
return True

time.sleep(1)


def container_exists(container_name):
try:
client = get_docker_client()
_ = client.containers.get(container_name)
return True
except docker.errors.NotFound:
return False
except docker.errors.APIError as e:
logging.error(f"Error checking container existence: {e}")
return False


cached_domain = None


Expand Down
49 changes: 49 additions & 0 deletions app/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,55 @@ async def health():
return schemas.HealthResponse(status=True)


@router.get(
"/update-available/",
responses={
400: {
"model": schemas.ErrorResponse,
"description": "Failed to check update available.",
}
},
response_model=schemas.UpdateAvailableResponse,
)
async def update_available():
try:
owner = "premAI-io"
remote_image = f"{utils.PREMD_IMAGE}:{utils.get_premd_last_tag(owner, 'prem-daemon', 'premd')}"
local_tags = utils.get_local_docker_image_tags(owner.lower(), "premd")
return {
"remote_image": remote_image,
"local_images": local_tags,
"update": remote_image not in local_tags,
}
except Exception as error:
logger.error(error)
raise HTTPException(
status_code=400,
detail={"message": f"Failed to check update available {error}."},
) from error


@router.get(
"/update-daemon/",
responses={
400: {
"model": schemas.ErrorResponse,
"description": "Failed to update.",
}
},
response_model=schemas.UpdateAvailableResponse,
)
async def trigger_update():
try:
utils.update_container()
except Exception as error:
logger.error(error)
raise HTTPException(
status_code=400,
detail={"message": f"Failed to update {error}."},
) from error


@router.get("/interfaces/", response_model=list[schemas.InterfaceResponse])
async def interfaces():
return utils.get_interfaces()
Expand Down
6 changes: 6 additions & 0 deletions app/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ class RunServiceInput(BaseModel):
id: str


class UpdateAvailableResponse(BaseModel):
remote_image: str
local_images: list[str]
update: bool


class ServiceInput(BaseModel):
id: str
name: str
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ torchvision==0.15.2
torchaudio==2.0.2
sentry-sdk==1.26.0
psutil==5.9.5
beautifulsoup4==4.12.2
packaging==23.1