diff --git a/.packit.yaml b/.packit.yaml index 4915f0f9..3dfb0600 100644 --- a/.packit.yaml +++ b/.packit.yaml @@ -61,6 +61,8 @@ jobs: packages: [ramalama-fedora] dist_git_branches: &fedora_targets - fedora-all + - epel10 + - epel9 - job: koji_build trigger: commit @@ -72,3 +74,5 @@ jobs: packages: [ramalama-fedora] dist_git_branches: - fedora-branched # rawhide updates are created automatically + - epel10 + - epel9 diff --git a/container-images/asahi/Containerfile b/container-images/asahi/Containerfile index eab5a2ee..3b758d2a 100644 --- a/container-images/asahi/Containerfile +++ b/container-images/asahi/Containerfile @@ -1,5 +1,6 @@ FROM fedora:41 +ENV ASAHI_VISIBLE_DEVICES 1 COPY ../scripts /scripts RUN chmod +x /scripts/*.sh && \ /scripts/build_llama_and_whisper.sh "asahi" diff --git a/container-images/intel-gpu/Containerfile b/container-images/intel-gpu/Containerfile index 69bca4db..a7bde6d6 100644 --- a/container-images/intel-gpu/Containerfile +++ b/container-images/intel-gpu/Containerfile @@ -1,32 +1,24 @@ FROM quay.io/fedora/fedora:41 as builder COPY intel-gpu/oneAPI.repo /etc/yum.repos.d/ +COPY scripts/build_llama_and_whisper.sh / -RUN dnf install -y intel-opencl g++ cmake git tar libcurl-devel intel-oneapi-mkl-sycl-devel intel-oneapi-dnnl-devel intel-oneapi-compiler-dpcpp-cpp ; \ - git clone https://github.com/ggerganov/llama.cpp.git -b b4523 ; \ - cd llama.cpp ; \ - mkdir -p build ; \ - cd build ; \ - source /opt/intel/oneapi/setvars.sh ; \ - cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_CCACHE=OFF -DGGML_NATIVE=OFF ; \ - cmake --build . --config Release -j -v ; \ - cmake --install . --prefix /llama-cpp +RUN chmod +x /build_llama_and_whisper.sh ; \ + /build_llama_and_whisper.sh intel-gpu FROM quay.io/fedora/fedora:41 -ENV LD_LIBRARY_PATH="/usr/local/lib64:/usr/local/lib/:/opt/intel/oneapi/mkl/2025.0/lib:/opt/intel/oneapi/compiler/2025.0/opt/compiler/lib:/opt/intel/oneapi/compiler/2025.0/lib/clang/19/lib:/opt/intel/oneapi/compiler/2025.0/lib:/opt/intel/oneapi/umf/0.9/lib:/opt/intel/oneapi/tbb/2022.0/lib:/opt/intel/oneapi/tcm/1.2/lib:/opt/intel/oneapi/redist/opt/compiler/lib:/opt/intel/oneapi/redist/lib/clang/19/lib:/opt/intel/oneapi/redist/lib:/opt/intel/oneapi/mkl/2025.0/lib:/opt/intel/oneapi/compiler/2025.0/opt/compiler/lib:/opt/intel/oneapi/compiler/2025.0/lib/clang/19/lib:/opt/intel/oneapi/compiler/2025.0/lib:/opt/intel/oneapi/umf/0.9/lib:/opt/intel/oneapi/tbb/2022.0/lib:/opt/intel/oneapi/tcm/1.2/lib:/opt/intel/oneapi/redist/opt/compiler/lib:/opt/intel/oneapi/redist/lib/clang/19/lib:/opt/intel/oneapi/redist/lib" - -COPY --from=builder /llama-cpp/bin/ /usr/local/bin/ -COPY --from=builder /llama-cpp/lib/ /usr/local/lib/ -COPY --from=builder /llama-cpp/lib64/ /usr/local/lib64/ -COPY --from=builder /llama-cpp/include/ /usr/local/include/ +COPY --from=builder /tmp/install/ /usr/ COPY intel-gpu/oneAPI.repo /etc/yum.repos.d/ +COPY --chown=0:0 intel-gpu/entrypoint.sh / -RUN dnf install -y intel-opencl libcurl lspci clinfo intel-oneapi-runtime-compilers intel-oneapi-mkl-core intel-oneapi-mkl-sycl-blas intel-oneapi-runtime-dnnl ; \ +RUN dnf install -y procps-ng python3 python3-pip python3-devel intel-level-zero oneapi-level-zero intel-compute-runtime libcurl lspci clinfo intel-oneapi-runtime-compilers intel-oneapi-mkl-core intel-oneapi-mkl-sycl-blas intel-oneapi-runtime-dnnl ; \ chown 0:0 /etc/passwd ; \ chown 0:0 /etc/group ; \ - chmod g=u /etc/passwd /etc/group ; \ - useradd -u 1000 -g render -G video -s /bin/bash -d /home/llama-user llama-user + chmod g=u /etc/passwd /etc/group /home ; \ + chmod +x /entrypoint.sh + +USER 10000 -USER 1000 -WORKDIR /home/llama-user +ENTRYPOINT ["/entrypoint.sh"] +CMD [ "tail", "-f", "/dev/null" ] \ No newline at end of file diff --git a/container-images/intel-gpu/entrypoint.sh b/container-images/intel-gpu/entrypoint.sh new file mode 100644 index 00000000..ee472b2f --- /dev/null +++ b/container-images/intel-gpu/entrypoint.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +if [ -z ${HOME} ] +then + export HOME=/home/llama-user +fi + +# Create Home directory +if [ ! -d "${HOME}" ] +then + mkdir -p "${HOME}" +fi + +# Create User ID +if ! whoami &> /dev/null +then + if [ -w /etc/passwd ] && [ -w /etc/group ] + then + echo "${USER_NAME:-llama-user}:x:$(id -u):0:${USER_NAME:-llama-user} user:${HOME}:/bin/bash" >> /etc/passwd + echo "${USER_NAME:-llama-user}:x:$(id -u):" >> /etc/group + render_group="$(cat /etc/group | grep 'render:x')" + video_group="$(cat /etc/group | grep 'video:x')" + render_group_new="${render_group}${USER_NAME:-llama-user}" + video_group_new="${video_group}${USER_NAME:-llama-user}" + sed "s|${render_group}|${render_group_new}|g" /etc/group > /tmp/group + cat /tmp/group > /etc/group + sed "s|${video_group}|${video_group_new}|g" /etc/group > /tmp/group + cat /tmp/group > /etc/group + fi +fi + +# Configure Z shell +if [ ! -f ${HOME}/.zshrc ] +then + (echo "source /opt/intel/oneapi/setvars.sh") > ${HOME}/.zshrc +fi + +# Configure Bash shell +if [ ! -f ${HOME}/.bashrc ] +then + (echo "source /opt/intel/oneapi/setvars.sh") > ${HOME}/.bashrc +fi + +source /opt/intel/oneapi/setvars.sh + +exec "$@" diff --git a/container-images/scripts/build_llama_and_whisper.sh b/container-images/scripts/build_llama_and_whisper.sh index d0fccdf3..25c81f9d 100755 --- a/container-images/scripts/build_llama_and_whisper.sh +++ b/container-images/scripts/build_llama_and_whisper.sh @@ -7,6 +7,9 @@ dnf_install() { local vulkan_rpms=("vulkan-headers" "vulkan-loader-devel" "vulkan-tools" \ "spirv-tools" "glslc" "glslang") local blas_rpms=("openblas-devel") + local intel_rpms=("intel-oneapi-mkl-sycl-devel" "intel-oneapi-dnnl-devel" \ + "intel-oneapi-compiler-dpcpp-cpp" "intel-level-zero" \ + "oneapi-level-zero" "oneapi-level-zero-devel" "intel-compute-runtime") # All the UBI-based ones if [ "$containerfile" = "ramalama" ] || [ "$containerfile" = "rocm" ] || \ @@ -43,6 +46,11 @@ dnf_install() { # shellcheck disable=SC1091 . /opt/rh/gcc-toolset-12/enable fi + + if [ "$containerfile" = "intel-gpu" ]; then + dnf install -y "${rpm_list[@]}" "${intel_rpms[@]}" + source /opt/intel/oneapi/setvars.sh + fi } cmake_check_warnings() { @@ -57,7 +65,7 @@ cmake_steps() { } set_install_prefix() { - if [ "$containerfile" = "cuda" ]; then + if [ "$containerfile" = "cuda" ] || [ "$containerfile" = "intel-gpu" ]; then install_prefix="/tmp/install" else install_prefix="/usr" @@ -76,6 +84,9 @@ configure_common_flags() { vulkan | asahi) common_flags+=("-DGGML_VULKAN=1") ;; + intel-gpu) + common_flags+=("-DGGML_SYCL=ON" "-DCMAKE_C_COMPILER=icx" "-DCMAKE_CXX_COMPILER=icpx") + ;; esac } diff --git a/docs/ramalama-bench.1.md b/docs/ramalama-bench.1.md index c94435f3..87d51acf 100644 --- a/docs/ramalama-bench.1.md +++ b/docs/ramalama-bench.1.md @@ -28,9 +28,6 @@ URL support means if a model is on a web site or even on your local system, you #### **--help**, **-h** show this help message and exit -#### **--network-mode**=*none* -set the network mode for the container - ## DESCRIPTION Benchmark specified AI Model. diff --git a/docs/ramalama-convert.1.md b/docs/ramalama-convert.1.md index bf09ec05..19fad843 100644 --- a/docs/ramalama-convert.1.md +++ b/docs/ramalama-convert.1.md @@ -16,9 +16,6 @@ The model can be from RamaLama model storage in Huggingface, Ollama, or local mo #### **--help**, **-h** Print usage message -#### **--network-mode**=*none* -sets the configuration for network namespaces when handling RUN instructions - #### **--type**=*raw* | *car* type of OCI Model Image to convert. diff --git a/docs/ramalama-run.1.md b/docs/ramalama-run.1.md index 6fdc205b..e0a69bd6 100644 --- a/docs/ramalama-run.1.md +++ b/docs/ramalama-run.1.md @@ -37,9 +37,6 @@ show this help message and exit #### **--name**, **-n** name of the container to run the Model in -#### **--network-mode**=*none* -set the network mode for the container - #### **--seed**= Specify seed rather than using random seed model interaction diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md index fede1be3..49840ef7 100644 --- a/docs/ramalama-serve.1.md +++ b/docs/ramalama-serve.1.md @@ -64,9 +64,6 @@ IP address for llama.cpp to listen on. #### **--name**, **-n** Name of the container to run the Model in. -#### **--network-mode**=*default* -set the network mode for the container - #### **--port**, **-p** port for AI Model server to listen on diff --git a/ramalama/cli.py b/ramalama/cli.py index 8fd2aefb..ead7cd65 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -449,12 +449,6 @@ def bench_cli(args): def bench_parser(subparsers): parser = subparsers.add_parser("bench", aliases=["benchmark"], help="benchmark specified AI Model") - parser.add_argument( - "--network-mode", - type=str, - default="none", - help="set the network mode for the container", - ) parser.add_argument("MODEL") # positional argument parser.set_defaults(func=bench_cli) @@ -678,13 +672,6 @@ def convert_parser(subparsers): Model "car" includes base image with the model stored in a /models subdir. Model "raw" contains the model and a link file model.file to it stored at /.""", ) - # https://docs.podman.io/en/latest/markdown/podman-build.1.html#network-mode-net - parser.add_argument( - "--network-mode", - type=str, - default="none", - help="sets the configuration for network namespaces when handling RUN instructions", - ) parser.add_argument("SOURCE") # positional argument parser.add_argument("TARGET") # positional argument parser.set_defaults(func=convert_cli) @@ -804,15 +791,6 @@ def _run(parser): def run_parser(subparsers): parser = subparsers.add_parser("run", help="run specified AI Model as a chatbot") _run(parser) - # Disable network access by default, and give the option to pass any supported network mode into - # podman if needed: - # https://docs.podman.io/en/latest/markdown/podman-run.1.html#network-mode-net - parser.add_argument( - "--network-mode", - type=str, - default="none", - help="set the network mode for the container", - ) parser.add_argument("MODEL") # positional argument parser.add_argument( "ARGS", nargs="*", help="Overrides the default prompt, and the output is returned without entering the chatbot" @@ -838,17 +816,6 @@ def serve_parser(subparsers): parser.add_argument( "-p", "--port", default=config.get('port', "8080"), help="port for AI Model server to listen on" ) - # --network-mode=default lets the container listen on localhost, and is an option that's compatible - # with podman and docker. It should use the bridge driver for rootful podman, the pasta driver for - # rootless podman, and the bridge driver for docker: - # https://docs.podman.io/en/latest/markdown/podman-run.1.html#network-mode-net - # https://docs.docker.com/engine/network/#drivers - parser.add_argument( - "--network-mode", - type=str, - default="default", - help="set the network mode for the container", - ) parser.add_argument("MODEL") # positional argument parser.set_defaults(func=serve_cli) diff --git a/ramalama/common.py b/ramalama/common.py index 50c1db2f..b5d63ca7 100644 --- a/ramalama/common.py +++ b/ramalama/common.py @@ -244,12 +244,16 @@ def get_gpu(): return # ASAHI CASE - if os.path.exists('/etc/os-release'): - with open('/etc/os-release', 'r') as file: - if "asahi" in file.read().lower(): - # Set Env Var and break - os.environ["ASAHI_VISIBLE_DEVICES"] = "1" - return + if os.path.exists('/proc/device-tree/compatible'): + try: + with open('/proc/device-tree/compatible', 'rb') as f: + content = f.read().split(b"\0") + # Check if "apple,arm-platform" is in the content + if b"apple,arm-platform" in content: + os.environ["ASAHI_VISIBLE_DEVICES"] = "1" + except OSError: + # Handle the case where the file does not exist + pass # NVIDIA CASE try: diff --git a/ramalama/model.py b/ramalama/model.py index bc0cb089..24c27e52 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -153,7 +153,6 @@ def setup_container(self, args): "-i", "--label", "RAMALAMA", - f"--network={args.network_mode}", "--security-opt=label=disable", "--name", name, diff --git a/ramalama/oci.py b/ramalama/oci.py index 096b1062..6f5e50ff 100644 --- a/ramalama/oci.py +++ b/ramalama/oci.py @@ -174,19 +174,7 @@ def build(self, source, target, args): else: c.write(model_raw) imageid = ( - run_cmd( - [ - self.conman, - "build", - "--no-cache", - f"--network={args.network_mode}", - "-q", - "-f", - containerfile.name, - contextdir, - ], - debug=args.debug, - ) + run_cmd([self.conman, "build", "--no-cache", "-q", "-f", containerfile.name, contextdir], debug=args.debug) .stdout.decode("utf-8") .strip() )