Skip to content

Commit fe573ec

Browse files
authored
Un syllabus sous la forme d'un joli tableau (#667)
Inspiration: https://datavizf25.classes.andrewheiss.com/schedule.html
1 parent cf50200 commit fe573ec

File tree

15 files changed

+580
-172
lines changed

15 files changed

+580
-172
lines changed

_quarto-prod.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ project:
3636
- content/NLP/01_intro.qmd
3737
- content/NLP/02_exoclean.qmd
3838
- content/NLP/03_embedding.qmd
39-
- content/modern-ds/s3.qmd
4039
- content/git/index.qmd
4140
- content/git/introgit.qmd
4241
- content/git/exogit.qmd

_quarto.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ project:
33
render:
44
- index.qmd
55
- 404.qmd
6+
- content/getting-started/index.qmd
67
- content/getting-started/03_revisions.qmd
7-
- content/modelisation/5_clustering.qmd
88

99
profile:
1010
default: fr

build/homepage/syllabus.py

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
"""
2+
syllabus_table.py
3+
4+
Charge doc/syllabus.json (liste de dicts), enrichit puis
5+
retourne un DataFrame Polars prêt à être affiché avec great_tables.
6+
"""
7+
8+
import re
9+
import json
10+
from pathlib import Path
11+
from typing import Optional, Dict, Mapping, Union
12+
import frontmatter
13+
14+
import polars as pl
15+
import gt_extras as gte
16+
from great_tables import *
17+
18+
DEFAULT_SECTION_MAP: Dict[str, str] = {
19+
"getting-started": "Introduction",
20+
"manipulation": "Manipuler des données",
21+
"visualisation": "Communiquer",
22+
"modelisation": "Modéliser",
23+
"NLP": "Natural Language Processing",
24+
"git": "Git",
25+
}
26+
27+
28+
def build_syllabus_pipeline(
29+
syllabus_json_path: Union[str, Path] = "doc/syllabus.json",
30+
section_map: Mapping[str, str] = DEFAULT_SECTION_MAP,
31+
active_color: str = "#E16462",
32+
inactive_color: str = "#E9ECEF",
33+
lang: str = "fr",
34+
) -> Dict[str, pl.DataFrame]:
35+
36+
df_prepared = build_syllabus_df(
37+
syllabus_json_path=syllabus_json_path,
38+
section_map=section_map,
39+
active_color=active_color,
40+
inactive_color=inactive_color,
41+
)
42+
43+
df_prepared = add_titles_from_qmd(df_prepared, project_root=".", lang=lang)
44+
45+
# 👇 Ajout de la colonne langues disponibles (FR / UK / FR, UK)
46+
df_prepared = add_langs_from_qmd(df_prepared, project_root=".")
47+
48+
df_prepared = df_prepared.select(
49+
["title_link", "section", "langs", "classroom_icon", "classroom", "material_icon", "type"]
50+
)
51+
52+
syllabus = {k[0]: subdf for k, subdf in df_prepared.group_by("section")}
53+
54+
return syllabus
55+
56+
57+
def add_langs_from_qmd(
58+
df: pl.DataFrame,
59+
project_root: Union[str, Path] = ".",
60+
file_col: str = "file",
61+
langs_col: str = "langs",
62+
) -> pl.DataFrame:
63+
root = Path(project_root)
64+
65+
df2 = df.with_columns(
66+
_file_norm=pl.col(file_col).cast(pl.Utf8).str.replace(r"^/+", "")
67+
)
68+
69+
df2 = df2.with_columns(
70+
pl.col("_file_norm").map_elements(
71+
lambda p: capture_lang(root / p),
72+
return_dtype=pl.Utf8,
73+
).alias(langs_col)
74+
).drop("_file_norm")
75+
76+
return df2
77+
78+
79+
def build_syllabus_df(
80+
syllabus_json_path: Union[str, Path] = "doc/syllabus.json",
81+
section_map: Mapping[str, str] = DEFAULT_SECTION_MAP,
82+
active_color: str = "#E16462",
83+
inactive_color: str = "#E9ECEF",
84+
) -> pl.DataFrame:
85+
"""
86+
Lit un fichier JSON de syllabus et renvoie un DataFrame Polars enrichi.
87+
88+
Paramètres
89+
----------
90+
syllabus_json_path:
91+
Chemin vers le fichier JSON (par défaut "doc/syllabus.json").
92+
section_map:
93+
Mapping {part: section} (part est le dossier après /content/).
94+
active_color:
95+
Couleur utilisée pour les icônes "actives".
96+
inactive_color:
97+
Couleur utilisée pour les icônes "inactives" (uniquement pour classroom_icon).
98+
99+
Retour
100+
------
101+
pl.DataFrame
102+
DataFrame avec colonnes: file, classroom, exercise, part, section,
103+
classroom_icon, material_icon
104+
"""
105+
path = Path(syllabus_json_path)
106+
107+
with path.open("r", encoding="utf-8") as f:
108+
syllabus = json.load(f)
109+
110+
if not isinstance(syllabus, list):
111+
raise ValueError("Le JSON attendu doit être une liste de dictionnaires (list[dict]).")
112+
113+
df = pl.DataFrame(syllabus)
114+
115+
# 1) Extraire la "part" depuis /content/<part>/
116+
df = df.with_columns(
117+
part=pl.col("file").cast(pl.Utf8).str.extract(r"^/?content/([^/]+)/", 1)
118+
)
119+
120+
# 2) Mapper vers une section lisible
121+
df = df.with_columns(
122+
section=pl.col("part").replace(section_map, default="Autre")
123+
)
124+
125+
# 3) Icône classroom: active si classroom non nul
126+
df = df.with_columns(
127+
classroom_icon=pl.col("classroom")
128+
.is_not_null()
129+
.map_elements(
130+
lambda x: gte.fa_icon_repeat(
131+
name="chalkboard-user",
132+
fill=active_color if x else inactive_color,
133+
),
134+
return_dtype=pl.String,
135+
)
136+
)
137+
df = df.with_columns(
138+
classroom=pl.when(pl.col("classroom").is_null())
139+
.then(pl.lit("Self learning"))
140+
.otherwise(pl.concat_str([pl.col("classroom"), pl.lit(" or self learning")]))
141+
)
142+
143+
# 4) Icône matériel: laptop-code si exercise True, sinon book-open-reader (toujours active_color)
144+
df = df.with_columns(
145+
material_icon=pl.col("exercise")
146+
.fill_null(False)
147+
.map_elements(
148+
lambda x: gte.fa_icon_repeat(
149+
name="laptop-code" if x else "book-open-reader",
150+
fill=active_color,
151+
),
152+
return_dtype=pl.String,
153+
)
154+
)
155+
156+
df = df.with_columns(
157+
type = pl.when(pl.col("exercise").fill_null(False))
158+
.then(pl.lit("Read from website and exercise notebooks"))
159+
.otherwise(pl.lit("Read from website"))
160+
)
161+
162+
return df
163+
164+
165+
166+
def _normalize_title(title: Optional[str]) -> Optional[str]:
167+
"""Règles de normalisation de titre."""
168+
if not title:
169+
return None
170+
171+
title = title.strip()
172+
173+
# "Partie ..." -> "Introduction à la partie ..."
174+
if re.match(r"^\s*Partie\b", title):
175+
title = re.sub(r"^\s*Partie\b", "Introduction à la partie", title, count=1)
176+
177+
# "Partie ..." -> "Introduction à la partie ..."
178+
if re.match(r"^\s*Part\b", title):
179+
title = re.sub(r"^\s*Part\b", "Introduction to part", title, count=1)
180+
181+
182+
return title
183+
184+
185+
def capture_lang(path: Path) -> Optional[str]:
186+
"""
187+
Détecte quelles langues sont disponibles dans le YAML front matter.
188+
189+
Retour:
190+
- "FR" si seul `title` est présent
191+
- "UK" si seul `title-en` est présent
192+
- "FR,UK" si les deux sont présents (priorité FR puis UK)
193+
- None si aucun des deux n'est présent ou fichier illisible
194+
"""
195+
if not path.exists():
196+
return None
197+
198+
try:
199+
post = frontmatter.load(path)
200+
except Exception:
201+
return None
202+
203+
has_fr = isinstance(post.get("title"), str) and post.get("title").strip() != ""
204+
has_uk = isinstance(post.get("title-en"), str) and post.get("title-en").strip() != ""
205+
206+
if has_fr and has_uk:
207+
return "FR,US"
208+
if has_fr:
209+
return "FR"
210+
if has_uk:
211+
return "US"
212+
return None
213+
214+
def _extract_title_from_qmd(
215+
path: Path,
216+
lang: str = "fr"
217+
) -> Optional[str]:
218+
"""Extrait `title` du YAML front matter d'un .qmd via python-frontmatter."""
219+
if not path.exists():
220+
return None
221+
222+
try:
223+
post = frontmatter.load(path)
224+
except Exception:
225+
return None
226+
227+
title_location = "title" if lang == "fr" else "title-en"
228+
title = post.get(title_location, post.get("title"))
229+
return _normalize_title(title) if isinstance(title, str) else None
230+
231+
232+
def _md_link(label: Optional[str], href: str) -> Optional[str]:
233+
if not label:
234+
return None
235+
# Évite de casser le markdown si jamais le titre contient des crochets
236+
safe_label = label.replace("[", "\\[").replace("]", "\\]")
237+
return f"[{safe_label}]({href})"
238+
239+
240+
def add_titles_from_qmd(
241+
df: pl.DataFrame,
242+
project_root: Union[str, Path] = ".",
243+
file_col: str = "file",
244+
title_col: str = "title",
245+
link_col: str = "title_link",
246+
lang: str = "fr"
247+
) -> pl.DataFrame:
248+
"""
249+
Ajoute:
250+
- `title_col`: titre normalisé (str ou None)
251+
- `link_col`: lien markdown "[titre](chemin)" (str ou None)
252+
253+
`chemin` est basé sur la colonne `file` normalisée (sans slash initial).
254+
"""
255+
root = Path(project_root)
256+
257+
# Normalise les chemins: "/content/..." -> "content/..."
258+
df2 = df.with_columns(
259+
_file_norm=pl.col(file_col).cast(pl.Utf8).str.replace(r"^/+", "")
260+
)
261+
262+
# Extraire titre
263+
df2 = df2.with_columns(
264+
pl.col("_file_norm").map_elements(
265+
lambda p: _extract_title_from_qmd(root / p, lang),
266+
return_dtype=pl.Utf8,
267+
).alias(title_col)
268+
)
269+
270+
# Construire lien markdown [titre](chemin)
271+
df2 = df2.with_columns(
272+
pl.struct(["_file_norm", title_col]).map_elements(
273+
lambda s: _md_link(s[title_col], s["_file_norm"]),
274+
return_dtype=pl.Utf8,
275+
).alias(link_col)
276+
).drop("_file_norm")
277+
278+
return df2
279+
280+
281+
def build_syllabus_gt(
282+
df_section: pl.DataFrame,
283+
dark_theme: bool = False,
284+
) -> GT:
285+
"""
286+
Construit une table great_tables (GT) pour une section du syllabus.
287+
288+
Attendu dans df_section:
289+
- section (sera cachée)
290+
- title_link (markdown)
291+
- classroom_icon, classroom
292+
- material_icon, type
293+
"""
294+
gt = (
295+
GT(df_section)
296+
.cols_hide(columns="section")
297+
.fmt_markdown("title_link")
298+
.fmt_flag(columns="langs")
299+
.cols_label(
300+
{
301+
"title_link": md("**Title**"),
302+
"classroom_icon": md("**Learning mode**"),
303+
"material_icon": md("**Resource type**"),
304+
"langs": md("**Available into**")
305+
}
306+
)
307+
.cols_width({"title_link": "60%"})
308+
.pipe(gte.gt_merge_stack, col1="classroom_icon", col2="classroom")
309+
.pipe(gte.gt_merge_stack, col1="material_icon", col2="type")
310+
)
311+
312+
if dark_theme:
313+
gt = gt.pipe(gte.gt_theme_dark)
314+
315+
return gt

0 commit comments

Comments
 (0)