Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 16 additions & 18 deletions Dockerfile.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,12 @@ RUN uv pip install \
"nvidia-nvjitlink-cu12==12.5.82"
RUN uv pip install --system --force-reinstall "pynvjitlink-cu12==0.5.2"

# b/385145217 Latest Colab lacks mkl numpy, install it.
RUN uv pip install --system --force-reinstall -i https://pypi.anaconda.org/intel/simple numpy

# newer daal4py requires tbb>=2022, but libpysal is downgrading it for some reason
RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2"
# b/315753846: Unpin translate package.
RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2" "google-cloud-translate==3.12.1"

# b/385145217 Latest Colab lacks mkl numpy, install it.
RUN uv pip install --system --force-reinstall -i https://software.repos.intel.com/python/pypi "numpy==1.26.4"

# b/404590350: Ray and torchtune have conflicting tune cli, we will prioritize torchtune.
# b/415358158: Gensim removed from Colab image to upgrade scipy
Expand All @@ -58,8 +59,8 @@ ADD clean-layer.sh /tmp/clean-layer.sh
ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl
ADD patches/template_conf.json /opt/kaggle/conf.json

# /opt/conda/lib/python3.11/site-packages
ARG PACKAGE_PATH=/usr/local/lib/python3.11/dist-packages
# /opt/conda/lib/python3.12/site-packages
ARG PACKAGE_PATH=/usr/local/lib/python3.12/dist-packages

# Install GPU-specific non-pip packages.
{{ if eq .Accelerator "gpu" }}
Expand All @@ -86,10 +87,14 @@ ADD patches/keras_internal.py \
RUN apt-get install -y libfreetype6-dev && \
apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing

# NLTK Project datasets
# b/408298750: We currently reinstall the package, because we get the following error:
# `AttributeError: module 'inspect' has no attribute 'formatargspec'. Did you mean: 'formatargvalues'?`
RUN uv pip install --system --force-reinstall "nltk>=3.9.1"
RUN apt-get install -y git-lfs && \
# vtk dependencies
apt-get install -y libgl1-mesa-glx && \
# xvfbwrapper dependencies
apt-get install -y xvfb && \
/tmp/clean-layer.sh

RUN uv pip install --system --force-reinstall "nltk==3.9.1"
RUN mkdir -p /usr/share/nltk_data && \
# NLTK Downloader no longer continues smoothly after an error, so we explicitly list
# the corpuses that work
Expand All @@ -108,13 +113,6 @@ RUN mkdir -p /usr/share/nltk_data && \
twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \
vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe

RUN apt-get install -y git-lfs && \
# vtk dependencies
apt-get install -y libgl1-mesa-glx && \
# xvfbwrapper dependencies
apt-get install -y xvfb && \
/tmp/clean-layer.sh

# Download base easyocr models.
# https://github.com/JaidedAI/EasyOCR#usage
RUN mkdir -p /root/.EasyOCR/model && \
Expand Down Expand Up @@ -180,7 +178,7 @@ ADD patches/kaggle_gcp.py \

# Figure out why this is in a different place?
# Found by doing a export PYTHONVERBOSE=1 and then running python and checking for where it looked for it.
ADD patches/sitecustomize.py /usr/lib/python3.11/sitecustomize.py
ADD patches/sitecustomize.py /usr/lib/python3.12/sitecustomize.py

ARG GIT_COMMIT=unknown \
BUILD_DATE=unknown
Expand Down
2 changes: 1 addition & 1 deletion config.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
BASE_IMAGE=us-docker.pkg.dev/colab-images/public/runtime
BASE_IMAGE_TAG=release-colab_20250626-060053_RC00
BASE_IMAGE_TAG=release-colab_20250916-060051_RC00
CUDA_MAJOR_VERSION=12
CUDA_MINOR_VERSION=5
16 changes: 5 additions & 11 deletions kaggle_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@ PyArabic
PyUpSet
Pympler
Rtree
shapely<2
SimpleITK
# b/302136621: Fix eli5 import for learntools, newer version require scikit-learn > 1.3
TPOT==0.12.1
Theano
Wand
annoy
arrow
Expand Down Expand Up @@ -49,11 +47,10 @@ geojson
geopandas==v0.14.4
gensim
google-cloud-aiplatform
# b/315753846: Unpin translate package.
google-cloud-translate==3.12.1
google-cloud-videointelligence
google-cloud-vision
google-genai
google-adk
gpxpy
h2o
haversine
Expand All @@ -74,7 +71,7 @@ jupyterlab-lsp
kaggle>=1.7.4.2
kaggle-environments
keras-cv
keras-nlp
keras-hub
keras-tuner
kornia
langid
Expand Down Expand Up @@ -109,12 +106,10 @@ preprocessing
pudb
pyLDAvis
pycryptodome
pydegensac
pydicom
pydub
pyemd
pyexcel-ods
pymc3
pymongo
pypdf
pytesseract
Expand All @@ -140,16 +135,15 @@ git+https://github.com/facebookresearch/segment-anything.git
# b/329869023: shap 0.45.0 breaks learntools
shap==0.44.1
squarify
tensorflow-cloud
tensorflow-io
tensorflow-text
tensorflow_decision_forests
timm
torchao
torchinfo
torchmetrics
torchtune
transformers>=4.51.0
tensorflow-cloud
tensorflow-io
tensorflow-text
triton
tsfresh
vtk
Expand Down
4 changes: 2 additions & 2 deletions tests/test_keras_nlp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest

import keras_nlp
import keras_hub
import keras
import numpy as np

Expand All @@ -9,7 +9,7 @@
class TestKerasNLP(unittest.TestCase):
def test_fit(self):
with create_test_kagglehub_server():
classifier = keras_nlp.models.BertClassifier.from_preset(
classifier = keras_hub.models.BertClassifier.from_preset(
'bert_tiny_en_uncased',
load_weights=False, # load randomly initialized model from preset architecture with weights
num_classes=2,
Expand Down
15 changes: 5 additions & 10 deletions tests/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,8 @@ def test_array(self):
# Numpy must be linked to the MKL. (Occasionally, a third-party package will muck up the installation
# and numpy will be reinstalled with an OpenBLAS backing.)
def test_mkl(self):
try:
from numpy.distutils.system_info import get_info
# This will throw an exception if the MKL is not linked correctly or return an empty dict.
self.assertTrue(get_info("blas_mkl"))
except:
# Fallback to check if mkl is present via show_config()
config_out = io.StringIO()
with redirect_stdout(config_out):
np.show_config()
self.assertIn("mkl_rt", config_out.getvalue())
# Fallback to check if mkl is present via show_config()
config_out = io.StringIO()
with redirect_stdout(config_out):
np.show_config()
self.assertIn("mkl-dynamic", config_out.getvalue())
26 changes: 13 additions & 13 deletions tests/test_tensorflow_decision_forests.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import unittest
# import unittest

import numpy as np
import pandas as pd
import tensorflow_decision_forests as tfdf
# import numpy as np
# import pandas as pd
# import tensorflow_decision_forests as tfdf

class TestTensorflowDecisionForest(unittest.TestCase):
def test_fit(self):
train_df = pd.read_csv("/input/tests/data/train.csv")
# class TestTensorflowDecisionForest(unittest.TestCase):
# def test_fit(self):
# train_df = pd.read_csv("/input/tests/data/train.csv")

# Convert the dataset into a TensorFlow dataset.
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label="label")
# # Convert the dataset into a TensorFlow dataset.
# train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label="label")

# Train the model
model = tfdf.keras.RandomForestModel(num_trees=1)
model.fit(train_ds)
# # Train the model
# model = tfdf.keras.RandomForestModel(num_trees=1)
# model.fit(train_ds)

self.assertEqual(1, model.count_params())
# self.assertEqual(1, model.count_params())