Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions .github/workflows/colab.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: Testing colab build

on:
push:
branches:
- main
- master
- box

jobs:
enonces:
name: Render notebooks
runs-on: ubuntu-latest
container: linogaliana/python-datascientist:latest
if: ${{ !github.event.pull_request.head.repo.fork }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}
- name: Configure safe.directory # Workaround for actions/checkout#760
run: |
git config --global --add safe.directory /__w/python-datascientist/python-datascientist
git config --global --add safe.directory /__w/python-datascientist/python-datascientist-notebooks
- shell: bash
run: |
ls
conda info
conda list
- name: Convert in ipynb with Quarto
env:
API_INPI_USERNAME: ${{ secrets.API_INPI_USERNAME }}
API_INPI_PASSWORD: ${{ secrets.API_INPI_PASSWORD }}
run: |
export QUARTO_PROFILE=fr,en
rm _quarto.yml
cp _quarto-test.yml _quarto.yml
rm content/modelisation/index.qmd # Remove file not building in ipynb
python build/colab/tweak_quarto_project.py
quarto render --to ipynb
#quarto render --profile fr --to ipynb
#quarto render --profile en --to ipynb
- name: Move to expected directory
env:
API_INPI_USERNAME: ${{ secrets.API_INPI_USERNAME }}
API_INPI_PASSWORD: ${{ secrets.API_INPI_PASSWORD }}
run: |
mkdir -p temp_notebooks
mkdir -p temp_notebooks/notebooks
python build/move_files.py --direction temp_notebooks/notebooks
- uses: actions/upload-artifact@v4
with:
name: Source enonce
path: content/
- uses: actions/upload-artifact@v4
with:
name: Enonces
path: temp_notebooks/notebooks/
- name: Pushes to another repository
uses: linogaliana/github-action-push-to-another-repository@main
env:
API_TOKEN_GITHUB: ${{ secrets.API_TOKEN_GITHUB }}
with:
source-directory: 'temp_notebooks/'
destination-repository-username: 'linogaliana'
destination-repository-name: 'python-datascientist-notebooks-colab'
user-email: [email protected]
destination-github-username: linogaliana
#target-branch: test
create-target-branch-if-needed: true
reset-repo: true



37 changes: 0 additions & 37 deletions .github/workflows/prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -197,41 +197,4 @@ jobs:
#target-branch: test
create-target-branch-if-needed: true
reset-repo: true
define-matrix:
runs-on: ubuntu-latest
needs: enonces
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
repository: 'linogaliana/python-datascientist-notebooks'
- name: Define matrix
id: set-matrix
run: |
echo "::set-output name=matrix::$(find . -type f -name "*.ipynb" \
! -name "_*" \
! -regex '.*/getting-started/.*' \
! -regex '.*/modelisation/index.*' \
! -regex '.*/git/.*' \
! -regex '.*/modern-ds/.*' \
! -regex '.*/manipulation/04a_webscraping_TP.*' \
| jq -R -s -c 'split("\n")[:-1]')"

check:
needs: define-matrix
runs-on: ubuntu-latest
container: linogaliana/python-datascientist:latest
continue-on-error: true
strategy:
matrix:
manifest: ${{ fromJson(needs.define-matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
repository: 'linogaliana/python-datascientist-notebooks'
- run: |
quarto render ${{ matrix.manifest }} --execute



98 changes: 98 additions & 0 deletions _quarto-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
project:
type: website
render:
- index.qmd
- 404.qmd
- content/getting-started/index.qmd
- content/manipulation/index.qmd
- content/visualisation/index.qmd
- content/getting-started/01_environment.qmd
- content/modelisation/index.qmd
- content/NLP/index.qmd
- content/annexes/corrections.qmd
- content/annexes/evaluation.qmd
- content/git/*.qmd
- content/annexes/about.qmd
- content/annexes/evaluation.qmd
- content/annexes/corrections.qmd

profile:
default: fr
group: [fr, en]

execute:
cache: true

# WEBSITE ARCHITECTURE ---------------------

website:
page-navigation: true
back-to-top-navigation: true
reader-mode: true
navbar:
background: "white"
search: true
title: false
left:
- file: index.qmd
text: Home
- sidebar:introduction
- sidebar:manipulation
- sidebar:communication
- sidebar:modelisation
- sidebar:NLP
- sidebar:modern
- sidebar:git
- sidebar:appendix
tools:
- icon: github
href: https://github.com/linogaliana/python-datascientist
comments:
giscus:
repo: linogaliana/python-datascientist
twitter-card: true
site-url: https://pythonds.linogaliana.fr
repo-url: https://github.com/linogaliana/python-datascientist
repo-branch: main
issue-url: https://github.com/linogaliana/python-datascientist/issues/new
repo-actions: [edit, issue]


format:
html:
theme:
light: [lightly, styles/custom.scss, styles/custom-light.scss]
dark: [darkly, styles/custom.scss, styles/custom-dark.scss]
css: styles/styles.css
toc: true
code-overflow: wrap
include-in-header:
- build/toggle.js
ipynb: default


# PAGE OPTIONS ---------------------

filters:
- build/replace-title.lua
- build/lang-notebook.lua
#- black-formatter
- include-code-files

crossref:
chapters: true

author: Lino Galiana
date: today
date-format: iso
page-layout: article
title-block-banner: "#e9f3fa"
number-sections: true
wrap: preserve
format-links: false
validate-yaml: false
keep-ipynb: true
lightbox: auto
google-scholar: true
commentable: true

143 changes: 143 additions & 0 deletions build/colab/callout_colab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import os
import re
import markdown
from loguru import logger


def create_python_snippet(title, content, callout_type):
"""
Creates a styled HTML callout box for Jupyter Notebook.

Args:
title (str): The title of the callout box.
content (str): The main content of the callout box.
callout_type (str): The type of callout (e.g., 'note', 'caution', 'warning').

Returns:
str: A styled HTML snippet.
"""

css_file_path = "./build/colab/colab.css"
if not os.path.exists(css_file_path):
raise FileNotFoundError(f"{css_file_path} not found. Please ensure the file exists.")

with open(css_file_path, "r") as css_file:
style = css_file.read()

style = """
<style>
{style}
</style>
"""

content_html = f"""
<div class="callout callout-{callout_type}">
<div class="callout-header">
{title}
</div>
<div class="callout-body">
{markdown.markdown(content)}
</div>
</div>
"""

full_html = (
"\n"
"```{python}\n"
"from IPython.display import HTML\n"
f"style = '''\n{style}\n'''\n"
f"content_html = '''\n{content_html}\n'''\n"
'HTML(f"{style}\\n{content_html}")\n'
"\n```"
"\n"
)
return full_html


def substitute_snippets(content, regex):
"""
Substitute each matched block with a call to create_python_snippet.
Args:
content (str): Original text content.
regex (re.Pattern): Compiled regex pattern to match the blocks.
Returns:
str: Updated content with substitutions.
"""

def replacement(match):
# Extract the callout type and content
callout_type_match = re.search(r"\.(\w+)", match.group(0))
callout_type = callout_type_match.group(1) if callout_type_match else "note"

# Extract the content inside the block
content_inside = match.group(1).strip()

# Look for a title (lines starting with '##')
title_match = re.search(r"^##\s*(.*)", content_inside, re.MULTILINE)
if title_match:
title = title_match.group(1).strip()
# Remove the title from the content
content_inside = re.sub(
r"^##\s*.*", "", content_inside, count=1, flags=re.MULTILINE
).strip()
else:
title = callout_type.capitalize()

# Replace with the call to `create_python_snippet`
snippet = create_python_snippet(
title=f"{title}", content=content_inside, callout_type=callout_type
)
return snippet

return regex.sub(replacement, content)


def process_file(input_file_path, regex_pattern, output_file_path=None):
"""
Reads a file, performs snippet substitutions, and writes the updated content to a new file.

Args:
input_file_path (str): Path to the input file.
regex_pattern (str): Regex pattern to identify content blocks.
output_file_suffix (str): Suffix to append to the input file for the output.

Returns:
None
"""

if output_file_path is None:
output_file_path = input_file_path.replace(".qmd", "_modified.qmd")

# Check if the input file exists
if not os.path.exists(input_file_path):
logger.error(f"Input file does not exist: {input_file_path}")
return None

# Read the content of the input file
logger.info(f"Reading content from {input_file_path}")
with open(input_file_path, "r") as file:
original_content = file.read()

# Compile the regex pattern
filtered_div_regex = re.compile(regex_pattern, re.MULTILINE)

# Perform the substitution
logger.info("Performing substitution of snippets.")
updated_content_with_snippets = substitute_snippets(
original_content, filtered_div_regex
)

# Write the modified content to the output file
logger.info(f"Writing updated content to {output_file_path}")
with open(output_file_path, "w") as file:
file.write(updated_content_with_snippets)

logger.success(f"Modified content written to {output_file_path}")


# Example usage
if __name__ == "__main__":
process_file(
input_file_path="./content/getting-started/01_environment.qmd",
regex_pattern=r":::\s*\{(?:\.note|\.caution|\.warning|\.important|\.tip|\.exercise)\}([\s\S]*?):::",
)
Loading
Loading