diff --git a/.github/workflows/colab.yml b/.github/workflows/colab.yml new file mode 100644 index 0000000000..d7a3eab3af --- /dev/null +++ b/.github/workflows/colab.yml @@ -0,0 +1,74 @@ +name: Testing colab build + +on: + push: + branches: + - main + - master + - box + +jobs: + enonces: + name: Render notebooks + runs-on: ubuntu-latest + container: linogaliana/python-datascientist:latest + if: ${{ !github.event.pull_request.head.repo.fork }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.ref }} + - name: Configure safe.directory # Workaround for actions/checkout#760 + run: | + git config --global --add safe.directory /__w/python-datascientist/python-datascientist + git config --global --add safe.directory /__w/python-datascientist/python-datascientist-notebooks + - shell: bash + run: | + ls + conda info + conda list + - name: Convert in ipynb with Quarto + env: + API_INPI_USERNAME: ${{ secrets.API_INPI_USERNAME }} + API_INPI_PASSWORD: ${{ secrets.API_INPI_PASSWORD }} + run: | + export QUARTO_PROFILE=fr,en + rm _quarto.yml + cp _quarto-test.yml _quarto.yml + rm content/modelisation/index.qmd # Remove file not building in ipynb + python build/colab/tweak_quarto_project.py + quarto render --to ipynb + #quarto render --profile fr --to ipynb + #quarto render --profile en --to ipynb + - name: Move to expected directory + env: + API_INPI_USERNAME: ${{ secrets.API_INPI_USERNAME }} + API_INPI_PASSWORD: ${{ secrets.API_INPI_PASSWORD }} + run: | + mkdir -p temp_notebooks + mkdir -p temp_notebooks/notebooks + python build/move_files.py --direction temp_notebooks/notebooks + - uses: actions/upload-artifact@v4 + with: + name: Source enonce + path: content/ + - uses: actions/upload-artifact@v4 + with: + name: Enonces + path: temp_notebooks/notebooks/ + - name: Pushes to another repository + uses: linogaliana/github-action-push-to-another-repository@main + env: + API_TOKEN_GITHUB: ${{ secrets.API_TOKEN_GITHUB }} + with: + source-directory: 'temp_notebooks/' + destination-repository-username: 'linogaliana' + destination-repository-name: 'python-datascientist-notebooks-colab' + user-email: lino.galiana@insee.fr + destination-github-username: linogaliana + #target-branch: test + create-target-branch-if-needed: true + reset-repo: true + + + diff --git a/.github/workflows/prod.yml b/.github/workflows/prod.yml index bef6556211..85ca1e9bc5 100644 --- a/.github/workflows/prod.yml +++ b/.github/workflows/prod.yml @@ -197,41 +197,4 @@ jobs: #target-branch: test create-target-branch-if-needed: true reset-repo: true - define-matrix: - runs-on: ubuntu-latest - needs: enonces - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - steps: - - uses: actions/checkout@v4 - with: - repository: 'linogaliana/python-datascientist-notebooks' - - name: Define matrix - id: set-matrix - run: | - echo "::set-output name=matrix::$(find . -type f -name "*.ipynb" \ - ! -name "_*" \ - ! -regex '.*/getting-started/.*' \ - ! -regex '.*/modelisation/index.*' \ - ! -regex '.*/git/.*' \ - ! -regex '.*/modern-ds/.*' \ - ! -regex '.*/manipulation/04a_webscraping_TP.*' \ - | jq -R -s -c 'split("\n")[:-1]')" - - check: - needs: define-matrix - runs-on: ubuntu-latest - container: linogaliana/python-datascientist:latest - continue-on-error: true - strategy: - matrix: - manifest: ${{ fromJson(needs.define-matrix.outputs.matrix) }} - steps: - - uses: actions/checkout@v4 - with: - repository: 'linogaliana/python-datascientist-notebooks' - - run: | - quarto render ${{ matrix.manifest }} --execute - - diff --git a/_quarto-test.yml b/_quarto-test.yml new file mode 100644 index 0000000000..170853733e --- /dev/null +++ b/_quarto-test.yml @@ -0,0 +1,98 @@ +project: + type: website + render: + - index.qmd + - 404.qmd + - content/getting-started/index.qmd + - content/manipulation/index.qmd + - content/visualisation/index.qmd + - content/getting-started/01_environment.qmd + - content/modelisation/index.qmd + - content/NLP/index.qmd + - content/annexes/corrections.qmd + - content/annexes/evaluation.qmd + - content/git/*.qmd + - content/annexes/about.qmd + - content/annexes/evaluation.qmd + - content/annexes/corrections.qmd + +profile: + default: fr + group: [fr, en] + +execute: + cache: true + +# WEBSITE ARCHITECTURE --------------------- + +website: + page-navigation: true + back-to-top-navigation: true + reader-mode: true + navbar: + background: "white" + search: true + title: false + left: + - file: index.qmd + text: Home + - sidebar:introduction + - sidebar:manipulation + - sidebar:communication + - sidebar:modelisation + - sidebar:NLP + - sidebar:modern + - sidebar:git + - sidebar:appendix + tools: + - icon: github + href: https://github.com/linogaliana/python-datascientist + comments: + giscus: + repo: linogaliana/python-datascientist + twitter-card: true + site-url: https://pythonds.linogaliana.fr + repo-url: https://github.com/linogaliana/python-datascientist + repo-branch: main + issue-url: https://github.com/linogaliana/python-datascientist/issues/new + repo-actions: [edit, issue] + + +format: + html: + theme: + light: [lightly, styles/custom.scss, styles/custom-light.scss] + dark: [darkly, styles/custom.scss, styles/custom-dark.scss] + css: styles/styles.css + toc: true + code-overflow: wrap + include-in-header: + - build/toggle.js + ipynb: default + + +# PAGE OPTIONS --------------------- + +filters: + - build/replace-title.lua + - build/lang-notebook.lua + #- black-formatter + - include-code-files + +crossref: + chapters: true + +author: Lino Galiana +date: today +date-format: iso +page-layout: article +title-block-banner: "#e9f3fa" +number-sections: true +wrap: preserve +format-links: false +validate-yaml: false +keep-ipynb: true +lightbox: auto +google-scholar: true +commentable: true + diff --git a/build/colab/callout_colab.py b/build/colab/callout_colab.py new file mode 100644 index 0000000000..a2013fe2d1 --- /dev/null +++ b/build/colab/callout_colab.py @@ -0,0 +1,143 @@ +import os +import re +import markdown +from loguru import logger + + +def create_python_snippet(title, content, callout_type): + """ + Creates a styled HTML callout box for Jupyter Notebook. + + Args: + title (str): The title of the callout box. + content (str): The main content of the callout box. + callout_type (str): The type of callout (e.g., 'note', 'caution', 'warning'). + + Returns: + str: A styled HTML snippet. + """ + + css_file_path = "./build/colab/colab.css" + if not os.path.exists(css_file_path): + raise FileNotFoundError(f"{css_file_path} not found. Please ensure the file exists.") + + with open(css_file_path, "r") as css_file: + style = css_file.read() + + style = """ + + """ + + content_html = f""" +
+
+ {title} +
+
+ {markdown.markdown(content)} +
+
+ """ + + full_html = ( + "\n" + "```{python}\n" + "from IPython.display import HTML\n" + f"style = '''\n{style}\n'''\n" + f"content_html = '''\n{content_html}\n'''\n" + 'HTML(f"{style}\\n{content_html}")\n' + "\n```" + "\n" + ) + return full_html + + +def substitute_snippets(content, regex): + """ + Substitute each matched block with a call to create_python_snippet. + Args: + content (str): Original text content. + regex (re.Pattern): Compiled regex pattern to match the blocks. + Returns: + str: Updated content with substitutions. + """ + + def replacement(match): + # Extract the callout type and content + callout_type_match = re.search(r"\.(\w+)", match.group(0)) + callout_type = callout_type_match.group(1) if callout_type_match else "note" + + # Extract the content inside the block + content_inside = match.group(1).strip() + + # Look for a title (lines starting with '##') + title_match = re.search(r"^##\s*(.*)", content_inside, re.MULTILINE) + if title_match: + title = title_match.group(1).strip() + # Remove the title from the content + content_inside = re.sub( + r"^##\s*.*", "", content_inside, count=1, flags=re.MULTILINE + ).strip() + else: + title = callout_type.capitalize() + + # Replace with the call to `create_python_snippet` + snippet = create_python_snippet( + title=f"{title}", content=content_inside, callout_type=callout_type + ) + return snippet + + return regex.sub(replacement, content) + + +def process_file(input_file_path, regex_pattern, output_file_path=None): + """ + Reads a file, performs snippet substitutions, and writes the updated content to a new file. + + Args: + input_file_path (str): Path to the input file. + regex_pattern (str): Regex pattern to identify content blocks. + output_file_suffix (str): Suffix to append to the input file for the output. + + Returns: + None + """ + + if output_file_path is None: + output_file_path = input_file_path.replace(".qmd", "_modified.qmd") + + # Check if the input file exists + if not os.path.exists(input_file_path): + logger.error(f"Input file does not exist: {input_file_path}") + return None + + # Read the content of the input file + logger.info(f"Reading content from {input_file_path}") + with open(input_file_path, "r") as file: + original_content = file.read() + + # Compile the regex pattern + filtered_div_regex = re.compile(regex_pattern, re.MULTILINE) + + # Perform the substitution + logger.info("Performing substitution of snippets.") + updated_content_with_snippets = substitute_snippets( + original_content, filtered_div_regex + ) + + # Write the modified content to the output file + logger.info(f"Writing updated content to {output_file_path}") + with open(output_file_path, "w") as file: + file.write(updated_content_with_snippets) + + logger.success(f"Modified content written to {output_file_path}") + + +# Example usage +if __name__ == "__main__": + process_file( + input_file_path="./content/getting-started/01_environment.qmd", + regex_pattern=r":::\s*\{(?:\.note|\.caution|\.warning|\.important|\.tip|\.exercise)\}([\s\S]*?):::", + ) diff --git a/build/colab/colab.css b/build/colab/colab.css new file mode 100644 index 0000000000..fa7f4fc557 --- /dev/null +++ b/build/colab/colab.css @@ -0,0 +1,66 @@ +.callout { + border: 2px solid #d1d5db; + border-radius: 8px; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); + margin-bottom: 20px; + background-color: #ffffff; + padding: 15px; +} +.callout-header-note { + font-weight: bold; + margin-bottom: 10px; + color: #ffffff; + background-color: #eaf3ff; + padding: 10px; + border-radius: 6px 6px 0 0; +} + +.callout-header-tip { + font-weight: bold; + margin-bottom: 10px; + color: #ffffff; + background-color: #ebf4f0; + padding: 10px; + border-radius: 6px 6px 0 0; +} + +.callout-header-exercise { + font-weight: bold; + margin-bottom: 10px; + color: #ffffff; + background-color: #fabdeb; + padding: 10px; + border-radius: 6px 6px 0 0; +} + +.callout-header-warning { + font-weight: bold; + margin-bottom: 10px; + color: #ffffff; + background-color: #fff9e9; + padding: 10px; + border-radius: 6px 6px 0 0; +} + +.callout-header-important { + font-weight: bold; + margin-bottom: 10px; + color: #ffffff; + background-color: #fcedee; + padding: 10px; + border-radius: 6px 6px 0 0; +} + +.callout-header-caution { + font-weight: bold; + margin-bottom: 10px; + color: #ffffff; + background-color: #fff3eb; + padding: 10px; + border-radius: 6px 6px 0 0; +} + + +.callout-body { + margin: 10px 0; +} \ No newline at end of file diff --git a/build/colab/tweak_quarto_project.py b/build/colab/tweak_quarto_project.py new file mode 100644 index 0000000000..4a95925641 --- /dev/null +++ b/build/colab/tweak_quarto_project.py @@ -0,0 +1,61 @@ +import os +import yaml +from loguru import logger +from callout_colab import process_file + + +def read_quarto_yaml(file_path): + """ + Reads and parses a YAML file. + + Args: + file_path (str): Path to the YAML file. + + Returns: + dict: Parsed content of the YAML file. + """ + if not os.path.exists(file_path): + logger.error(f"YAML file does not exist: {file_path}") + return None + + try: + logger.info(f"Reading YAML file from {file_path}") + with open(file_path, "r") as file: + yaml_content = yaml.safe_load(file) + logger.success(f"Successfully read YAML content from {file_path}") + return yaml_content + except Exception as e: + logger.error(f"Error reading YAML file: {e}") + return None + + +def list_render_files(file_path): + """ + Reads and logs the content of the `_quarto.yml` file. + + Args: + file_path (str): Path to the `_quarto.yml` file. + + Returns: + None + """ + yaml_content = read_quarto_yaml(file_path) + + if not yaml_content: + raise FileNotFoundError("No content to process.") + + files = yaml_content.get("project").get("render") + + return files + + +if __name__ == "__main__": + + files = list_render_files("_quarto.yml") + + for file in files: + process_file( + input_file_path=file, + regex_pattern=r":::\s*\{(?:\.note|\.caution|\.warning|\.important|\.tip|\.exercise)\}([\s\S]*?):::", + output_file_path=file, + ) diff --git a/content/getting-started/01_environment.qmd b/content/getting-started/01_environment.qmd index d3edf786d3..0576b8903a 100644 --- a/content/getting-started/01_environment.qmd +++ b/content/getting-started/01_environment.qmd @@ -9,6 +9,13 @@ description-en: | --- +```{python} +#| echo: true +import pandas as pd +x = [0] +``` + + ::: {.content-visible when-profile="fr"} :::: {.tip} ## Objet de ce chapitre diff --git a/content/manipulation/04a_webscraping_TP.qmd b/content/manipulation/04a_webscraping_TP.qmd index 683d75d99d..c945546fa4 100644 --- a/content/manipulation/04a_webscraping_TP.qmd +++ b/content/manipulation/04a_webscraping_TP.qmd @@ -648,7 +648,7 @@ print("Il y a", len(page.findAll("table")), "éléments dans la page qui sont de :::: {.tip} `Python` n'est pas le seul langage qui permet de récupérer des éléments issus d'une page web. C'est l'un des objectifs principaux de `Javascript`, qui est accessible par le biais de n'importe quel navigateur web. -Par exemple, pour faire le parallèle avec `page.find('title')` que nous avons utilisé au niveau de `Python`, vous pouvez ouvrir la page [précédemment mentionnée](`{python} url_ligue_1`) avec votre navigateur. Après avoir ouvert les outils de développement du navigateur (CTRL+MAJ+K sur `Firefox`), vous pouvez taper dans la console `document.querySelector("title")` qui vous permettra d'obtenir le contenu du noeud HTML recherché: +Par exemple, pour faire le parallèle avec `page.find('title')` que nous avons utilisé au niveau de `Python`, vous pouvez ouvrir la page [précédemment mentionnée](https://fr.wikipedia.org/wiki/Championnat_de_France_de_football_2019-2020) avec votre navigateur. Après avoir ouvert les outils de développement du navigateur (CTRL+MAJ+K sur `Firefox`), vous pouvez taper dans la console `document.querySelector("title")` qui vous permettra d'obtenir le contenu du noeud HTML recherché: ![](./04_webscraping/console_log.png) @@ -663,7 +663,7 @@ La compréhension de la structure d'une page et de l'interaction de celle-ci ave :::: {.tip} `Python` is not the only language that allows you to retrieve elements from a web page. This is one of the main objectives of `Javascript`, which is accessible through any web browser. -For example, to draw a parallel with `page.find('title')` that we used in `Python`, you can open the [previously mentioned page](`{python} url_ligue_1`) with your browser. After opening the browser's developer tools (CTRL+SHIFT+K on `Firefox`), you can type `document.querySelector("title")` in the console to get the content of the HTML node you are looking for: +For example, to draw a parallel with `page.find('title')` that we used in `Python`, you can open the [previously mentioned page](https://fr.wikipedia.org/wiki/Championnat_de_France_de_football_2019-2020) with your browser. After opening the browser's developer tools (CTRL+SHIFT+K on `Firefox`), you can type `document.querySelector("title")` in the console to get the content of the HTML node you are looking for: ![](./04_webscraping/console_log.png) diff --git a/content/visualisation/matplotlib.qmd b/content/visualisation/matplotlib.qmd index 9b8948d32a..bd5002fd39 100644 --- a/content/visualisation/matplotlib.qmd +++ b/content/visualisation/matplotlib.qmd @@ -11,6 +11,7 @@ description-en: | image: https://minio.lab.sspcloud.fr/lgaliana/generative-art/pythonds/drawing.png echo: false bibliography: ../../reference.bib +eval: false --- {{< badges