diff --git a/.github/workflows/colab.yml b/.github/workflows/colab.yml
new file mode 100644
index 0000000000..d7a3eab3af
--- /dev/null
+++ b/.github/workflows/colab.yml
@@ -0,0 +1,74 @@
+name: Testing colab build
+
+on:
+ push:
+ branches:
+ - main
+ - master
+ - box
+
+jobs:
+ enonces:
+ name: Render notebooks
+ runs-on: ubuntu-latest
+ container: linogaliana/python-datascientist:latest
+ if: ${{ !github.event.pull_request.head.repo.fork }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ ref: ${{ github.event.pull_request.head.ref }}
+ - name: Configure safe.directory # Workaround for actions/checkout#760
+ run: |
+ git config --global --add safe.directory /__w/python-datascientist/python-datascientist
+ git config --global --add safe.directory /__w/python-datascientist/python-datascientist-notebooks
+ - shell: bash
+ run: |
+ ls
+ conda info
+ conda list
+ - name: Convert in ipynb with Quarto
+ env:
+ API_INPI_USERNAME: ${{ secrets.API_INPI_USERNAME }}
+ API_INPI_PASSWORD: ${{ secrets.API_INPI_PASSWORD }}
+ run: |
+ export QUARTO_PROFILE=fr,en
+ rm _quarto.yml
+ cp _quarto-test.yml _quarto.yml
+ rm content/modelisation/index.qmd # Remove file not building in ipynb
+ python build/colab/tweak_quarto_project.py
+ quarto render --to ipynb
+ #quarto render --profile fr --to ipynb
+ #quarto render --profile en --to ipynb
+ - name: Move to expected directory
+ env:
+ API_INPI_USERNAME: ${{ secrets.API_INPI_USERNAME }}
+ API_INPI_PASSWORD: ${{ secrets.API_INPI_PASSWORD }}
+ run: |
+ mkdir -p temp_notebooks
+ mkdir -p temp_notebooks/notebooks
+ python build/move_files.py --direction temp_notebooks/notebooks
+ - uses: actions/upload-artifact@v4
+ with:
+ name: Source enonce
+ path: content/
+ - uses: actions/upload-artifact@v4
+ with:
+ name: Enonces
+ path: temp_notebooks/notebooks/
+ - name: Pushes to another repository
+ uses: linogaliana/github-action-push-to-another-repository@main
+ env:
+ API_TOKEN_GITHUB: ${{ secrets.API_TOKEN_GITHUB }}
+ with:
+ source-directory: 'temp_notebooks/'
+ destination-repository-username: 'linogaliana'
+ destination-repository-name: 'python-datascientist-notebooks-colab'
+ user-email: lino.galiana@insee.fr
+ destination-github-username: linogaliana
+ #target-branch: test
+ create-target-branch-if-needed: true
+ reset-repo: true
+
+
+
diff --git a/.github/workflows/prod.yml b/.github/workflows/prod.yml
index bef6556211..85ca1e9bc5 100644
--- a/.github/workflows/prod.yml
+++ b/.github/workflows/prod.yml
@@ -197,41 +197,4 @@ jobs:
#target-branch: test
create-target-branch-if-needed: true
reset-repo: true
- define-matrix:
- runs-on: ubuntu-latest
- needs: enonces
- outputs:
- matrix: ${{ steps.set-matrix.outputs.matrix }}
- steps:
- - uses: actions/checkout@v4
- with:
- repository: 'linogaliana/python-datascientist-notebooks'
- - name: Define matrix
- id: set-matrix
- run: |
- echo "::set-output name=matrix::$(find . -type f -name "*.ipynb" \
- ! -name "_*" \
- ! -regex '.*/getting-started/.*' \
- ! -regex '.*/modelisation/index.*' \
- ! -regex '.*/git/.*' \
- ! -regex '.*/modern-ds/.*' \
- ! -regex '.*/manipulation/04a_webscraping_TP.*' \
- | jq -R -s -c 'split("\n")[:-1]')"
-
- check:
- needs: define-matrix
- runs-on: ubuntu-latest
- container: linogaliana/python-datascientist:latest
- continue-on-error: true
- strategy:
- matrix:
- manifest: ${{ fromJson(needs.define-matrix.outputs.matrix) }}
- steps:
- - uses: actions/checkout@v4
- with:
- repository: 'linogaliana/python-datascientist-notebooks'
- - run: |
- quarto render ${{ matrix.manifest }} --execute
-
-
diff --git a/_quarto-test.yml b/_quarto-test.yml
new file mode 100644
index 0000000000..170853733e
--- /dev/null
+++ b/_quarto-test.yml
@@ -0,0 +1,98 @@
+project:
+ type: website
+ render:
+ - index.qmd
+ - 404.qmd
+ - content/getting-started/index.qmd
+ - content/manipulation/index.qmd
+ - content/visualisation/index.qmd
+ - content/getting-started/01_environment.qmd
+ - content/modelisation/index.qmd
+ - content/NLP/index.qmd
+ - content/annexes/corrections.qmd
+ - content/annexes/evaluation.qmd
+ - content/git/*.qmd
+ - content/annexes/about.qmd
+ - content/annexes/evaluation.qmd
+ - content/annexes/corrections.qmd
+
+profile:
+ default: fr
+ group: [fr, en]
+
+execute:
+ cache: true
+
+# WEBSITE ARCHITECTURE ---------------------
+
+website:
+ page-navigation: true
+ back-to-top-navigation: true
+ reader-mode: true
+ navbar:
+ background: "white"
+ search: true
+ title: false
+ left:
+ - file: index.qmd
+ text: Home
+ - sidebar:introduction
+ - sidebar:manipulation
+ - sidebar:communication
+ - sidebar:modelisation
+ - sidebar:NLP
+ - sidebar:modern
+ - sidebar:git
+ - sidebar:appendix
+ tools:
+ - icon: github
+ href: https://github.com/linogaliana/python-datascientist
+ comments:
+ giscus:
+ repo: linogaliana/python-datascientist
+ twitter-card: true
+ site-url: https://pythonds.linogaliana.fr
+ repo-url: https://github.com/linogaliana/python-datascientist
+ repo-branch: main
+ issue-url: https://github.com/linogaliana/python-datascientist/issues/new
+ repo-actions: [edit, issue]
+
+
+format:
+ html:
+ theme:
+ light: [lightly, styles/custom.scss, styles/custom-light.scss]
+ dark: [darkly, styles/custom.scss, styles/custom-dark.scss]
+ css: styles/styles.css
+ toc: true
+ code-overflow: wrap
+ include-in-header:
+ - build/toggle.js
+ ipynb: default
+
+
+# PAGE OPTIONS ---------------------
+
+filters:
+ - build/replace-title.lua
+ - build/lang-notebook.lua
+ #- black-formatter
+ - include-code-files
+
+crossref:
+ chapters: true
+
+author: Lino Galiana
+date: today
+date-format: iso
+page-layout: article
+title-block-banner: "#e9f3fa"
+number-sections: true
+wrap: preserve
+format-links: false
+validate-yaml: false
+keep-ipynb: true
+lightbox: auto
+google-scholar: true
+commentable: true
+
diff --git a/build/colab/callout_colab.py b/build/colab/callout_colab.py
new file mode 100644
index 0000000000..a2013fe2d1
--- /dev/null
+++ b/build/colab/callout_colab.py
@@ -0,0 +1,143 @@
+import os
+import re
+import markdown
+from loguru import logger
+
+
+def create_python_snippet(title, content, callout_type):
+ """
+ Creates a styled HTML callout box for Jupyter Notebook.
+
+ Args:
+ title (str): The title of the callout box.
+ content (str): The main content of the callout box.
+ callout_type (str): The type of callout (e.g., 'note', 'caution', 'warning').
+
+ Returns:
+ str: A styled HTML snippet.
+ """
+
+ css_file_path = "./build/colab/colab.css"
+ if not os.path.exists(css_file_path):
+ raise FileNotFoundError(f"{css_file_path} not found. Please ensure the file exists.")
+
+ with open(css_file_path, "r") as css_file:
+ style = css_file.read()
+
+ style = """
+
+ """
+
+ content_html = f"""
+
+
+
+ {markdown.markdown(content)}
+
+
+ """
+
+ full_html = (
+ "\n"
+ "```{python}\n"
+ "from IPython.display import HTML\n"
+ f"style = '''\n{style}\n'''\n"
+ f"content_html = '''\n{content_html}\n'''\n"
+ 'HTML(f"{style}\\n{content_html}")\n'
+ "\n```"
+ "\n"
+ )
+ return full_html
+
+
+def substitute_snippets(content, regex):
+ """
+ Substitute each matched block with a call to create_python_snippet.
+ Args:
+ content (str): Original text content.
+ regex (re.Pattern): Compiled regex pattern to match the blocks.
+ Returns:
+ str: Updated content with substitutions.
+ """
+
+ def replacement(match):
+ # Extract the callout type and content
+ callout_type_match = re.search(r"\.(\w+)", match.group(0))
+ callout_type = callout_type_match.group(1) if callout_type_match else "note"
+
+ # Extract the content inside the block
+ content_inside = match.group(1).strip()
+
+ # Look for a title (lines starting with '##')
+ title_match = re.search(r"^##\s*(.*)", content_inside, re.MULTILINE)
+ if title_match:
+ title = title_match.group(1).strip()
+ # Remove the title from the content
+ content_inside = re.sub(
+ r"^##\s*.*", "", content_inside, count=1, flags=re.MULTILINE
+ ).strip()
+ else:
+ title = callout_type.capitalize()
+
+ # Replace with the call to `create_python_snippet`
+ snippet = create_python_snippet(
+ title=f"{title}", content=content_inside, callout_type=callout_type
+ )
+ return snippet
+
+ return regex.sub(replacement, content)
+
+
+def process_file(input_file_path, regex_pattern, output_file_path=None):
+ """
+ Reads a file, performs snippet substitutions, and writes the updated content to a new file.
+
+ Args:
+ input_file_path (str): Path to the input file.
+ regex_pattern (str): Regex pattern to identify content blocks.
+ output_file_suffix (str): Suffix to append to the input file for the output.
+
+ Returns:
+ None
+ """
+
+ if output_file_path is None:
+ output_file_path = input_file_path.replace(".qmd", "_modified.qmd")
+
+ # Check if the input file exists
+ if not os.path.exists(input_file_path):
+ logger.error(f"Input file does not exist: {input_file_path}")
+ return None
+
+ # Read the content of the input file
+ logger.info(f"Reading content from {input_file_path}")
+ with open(input_file_path, "r") as file:
+ original_content = file.read()
+
+ # Compile the regex pattern
+ filtered_div_regex = re.compile(regex_pattern, re.MULTILINE)
+
+ # Perform the substitution
+ logger.info("Performing substitution of snippets.")
+ updated_content_with_snippets = substitute_snippets(
+ original_content, filtered_div_regex
+ )
+
+ # Write the modified content to the output file
+ logger.info(f"Writing updated content to {output_file_path}")
+ with open(output_file_path, "w") as file:
+ file.write(updated_content_with_snippets)
+
+ logger.success(f"Modified content written to {output_file_path}")
+
+
+# Example usage
+if __name__ == "__main__":
+ process_file(
+ input_file_path="./content/getting-started/01_environment.qmd",
+ regex_pattern=r":::\s*\{(?:\.note|\.caution|\.warning|\.important|\.tip|\.exercise)\}([\s\S]*?):::",
+ )
diff --git a/build/colab/colab.css b/build/colab/colab.css
new file mode 100644
index 0000000000..fa7f4fc557
--- /dev/null
+++ b/build/colab/colab.css
@@ -0,0 +1,66 @@
+.callout {
+ border: 2px solid #d1d5db;
+ border-radius: 8px;
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+ margin-bottom: 20px;
+ background-color: #ffffff;
+ padding: 15px;
+}
+.callout-header-note {
+ font-weight: bold;
+ margin-bottom: 10px;
+ color: #ffffff;
+ background-color: #eaf3ff;
+ padding: 10px;
+ border-radius: 6px 6px 0 0;
+}
+
+.callout-header-tip {
+ font-weight: bold;
+ margin-bottom: 10px;
+ color: #ffffff;
+ background-color: #ebf4f0;
+ padding: 10px;
+ border-radius: 6px 6px 0 0;
+}
+
+.callout-header-exercise {
+ font-weight: bold;
+ margin-bottom: 10px;
+ color: #ffffff;
+ background-color: #fabdeb;
+ padding: 10px;
+ border-radius: 6px 6px 0 0;
+}
+
+.callout-header-warning {
+ font-weight: bold;
+ margin-bottom: 10px;
+ color: #ffffff;
+ background-color: #fff9e9;
+ padding: 10px;
+ border-radius: 6px 6px 0 0;
+}
+
+.callout-header-important {
+ font-weight: bold;
+ margin-bottom: 10px;
+ color: #ffffff;
+ background-color: #fcedee;
+ padding: 10px;
+ border-radius: 6px 6px 0 0;
+}
+
+.callout-header-caution {
+ font-weight: bold;
+ margin-bottom: 10px;
+ color: #ffffff;
+ background-color: #fff3eb;
+ padding: 10px;
+ border-radius: 6px 6px 0 0;
+}
+
+
+.callout-body {
+ margin: 10px 0;
+}
\ No newline at end of file
diff --git a/build/colab/tweak_quarto_project.py b/build/colab/tweak_quarto_project.py
new file mode 100644
index 0000000000..4a95925641
--- /dev/null
+++ b/build/colab/tweak_quarto_project.py
@@ -0,0 +1,61 @@
+import os
+import yaml
+from loguru import logger
+from callout_colab import process_file
+
+
+def read_quarto_yaml(file_path):
+ """
+ Reads and parses a YAML file.
+
+ Args:
+ file_path (str): Path to the YAML file.
+
+ Returns:
+ dict: Parsed content of the YAML file.
+ """
+ if not os.path.exists(file_path):
+ logger.error(f"YAML file does not exist: {file_path}")
+ return None
+
+ try:
+ logger.info(f"Reading YAML file from {file_path}")
+ with open(file_path, "r") as file:
+ yaml_content = yaml.safe_load(file)
+ logger.success(f"Successfully read YAML content from {file_path}")
+ return yaml_content
+ except Exception as e:
+ logger.error(f"Error reading YAML file: {e}")
+ return None
+
+
+def list_render_files(file_path):
+ """
+ Reads and logs the content of the `_quarto.yml` file.
+
+ Args:
+ file_path (str): Path to the `_quarto.yml` file.
+
+ Returns:
+ None
+ """
+ yaml_content = read_quarto_yaml(file_path)
+
+ if not yaml_content:
+ raise FileNotFoundError("No content to process.")
+
+ files = yaml_content.get("project").get("render")
+
+ return files
+
+
+if __name__ == "__main__":
+
+ files = list_render_files("_quarto.yml")
+
+ for file in files:
+ process_file(
+ input_file_path=file,
+ regex_pattern=r":::\s*\{(?:\.note|\.caution|\.warning|\.important|\.tip|\.exercise)\}([\s\S]*?):::",
+ output_file_path=file,
+ )
diff --git a/content/getting-started/01_environment.qmd b/content/getting-started/01_environment.qmd
index d3edf786d3..0576b8903a 100644
--- a/content/getting-started/01_environment.qmd
+++ b/content/getting-started/01_environment.qmd
@@ -9,6 +9,13 @@ description-en: |
---
+```{python}
+#| echo: true
+import pandas as pd
+x = [0]
+```
+
+
::: {.content-visible when-profile="fr"}
:::: {.tip}
## Objet de ce chapitre
diff --git a/content/manipulation/04a_webscraping_TP.qmd b/content/manipulation/04a_webscraping_TP.qmd
index 683d75d99d..c945546fa4 100644
--- a/content/manipulation/04a_webscraping_TP.qmd
+++ b/content/manipulation/04a_webscraping_TP.qmd
@@ -648,7 +648,7 @@ print("Il y a", len(page.findAll("table")), "éléments dans la page qui sont de
:::: {.tip}
`Python` n'est pas le seul langage qui permet de récupérer des éléments issus d'une page web. C'est l'un des objectifs principaux de `Javascript`, qui est accessible par le biais de n'importe quel navigateur web.
-Par exemple, pour faire le parallèle avec `page.find('title')` que nous avons utilisé au niveau de `Python`, vous pouvez ouvrir la page [précédemment mentionnée](`{python} url_ligue_1`) avec votre navigateur. Après avoir ouvert les outils de développement du navigateur (CTRL+MAJ+K sur `Firefox`), vous pouvez taper dans la console `document.querySelector("title")` qui vous permettra d'obtenir le contenu du noeud HTML recherché:
+Par exemple, pour faire le parallèle avec `page.find('title')` que nous avons utilisé au niveau de `Python`, vous pouvez ouvrir la page [précédemment mentionnée](https://fr.wikipedia.org/wiki/Championnat_de_France_de_football_2019-2020) avec votre navigateur. Après avoir ouvert les outils de développement du navigateur (CTRL+MAJ+K sur `Firefox`), vous pouvez taper dans la console `document.querySelector("title")` qui vous permettra d'obtenir le contenu du noeud HTML recherché:

@@ -663,7 +663,7 @@ La compréhension de la structure d'une page et de l'interaction de celle-ci ave
:::: {.tip}
`Python` is not the only language that allows you to retrieve elements from a web page. This is one of the main objectives of `Javascript`, which is accessible through any web browser.
-For example, to draw a parallel with `page.find('title')` that we used in `Python`, you can open the [previously mentioned page](`{python} url_ligue_1`) with your browser. After opening the browser's developer tools (CTRL+SHIFT+K on `Firefox`), you can type `document.querySelector("title")` in the console to get the content of the HTML node you are looking for:
+For example, to draw a parallel with `page.find('title')` that we used in `Python`, you can open the [previously mentioned page](https://fr.wikipedia.org/wiki/Championnat_de_France_de_football_2019-2020) with your browser. After opening the browser's developer tools (CTRL+SHIFT+K on `Firefox`), you can type `document.querySelector("title")` in the console to get the content of the HTML node you are looking for:

diff --git a/content/visualisation/matplotlib.qmd b/content/visualisation/matplotlib.qmd
index 9b8948d32a..bd5002fd39 100644
--- a/content/visualisation/matplotlib.qmd
+++ b/content/visualisation/matplotlib.qmd
@@ -11,6 +11,7 @@ description-en: |
image: https://minio.lab.sspcloud.fr/lgaliana/generative-art/pythonds/drawing.png
echo: false
bibliography: ../../reference.bib
+eval: false
---
{{< badges