1+ """
2+ syllabus_table.py
3+
4+ Charge doc/syllabus.json (liste de dicts), enrichit puis
5+ retourne un DataFrame Polars prêt à être affiché avec great_tables.
6+ """
7+
8+ import re
9+ import json
10+ from pathlib import Path
11+ from typing import Optional , Dict , Mapping , Union
12+ import frontmatter
13+
14+ import polars as pl
15+ import gt_extras as gte
16+ from great_tables import *
17+
18+ DEFAULT_SECTION_MAP : Dict [str , str ] = {
19+ "getting-started" : "Introduction" ,
20+ "manipulation" : "Manipuler des données" ,
21+ "visualisation" : "Communiquer" ,
22+ "modelisation" : "Modéliser" ,
23+ "NLP" : "Natural Language Processing" ,
24+ "git" : "Git" ,
25+ }
26+
27+
28+ def build_syllabus_pipeline (
29+ syllabus_json_path : Union [str , Path ] = "doc/syllabus.json" ,
30+ section_map : Mapping [str , str ] = DEFAULT_SECTION_MAP ,
31+ active_color : str = "#E16462" ,
32+ inactive_color : str = "#E9ECEF" ,
33+ lang : str = "fr" ,
34+ ) -> Dict [str , pl .DataFrame ]:
35+
36+ df_prepared = build_syllabus_df (
37+ syllabus_json_path = syllabus_json_path ,
38+ section_map = section_map ,
39+ active_color = active_color ,
40+ inactive_color = inactive_color ,
41+ )
42+
43+ df_prepared = add_titles_from_qmd (df_prepared , project_root = "." , lang = lang )
44+
45+ # 👇 Ajout de la colonne langues disponibles (FR / UK / FR, UK)
46+ df_prepared = add_langs_from_qmd (df_prepared , project_root = "." )
47+
48+ df_prepared = df_prepared .select (
49+ ["title_link" , "section" , "langs" , "classroom_icon" , "classroom" , "material_icon" , "type" ]
50+ )
51+
52+ syllabus = {k [0 ]: subdf for k , subdf in df_prepared .group_by ("section" )}
53+
54+ return syllabus
55+
56+
57+ def add_langs_from_qmd (
58+ df : pl .DataFrame ,
59+ project_root : Union [str , Path ] = "." ,
60+ file_col : str = "file" ,
61+ langs_col : str = "langs" ,
62+ ) -> pl .DataFrame :
63+ root = Path (project_root )
64+
65+ df2 = df .with_columns (
66+ _file_norm = pl .col (file_col ).cast (pl .Utf8 ).str .replace (r"^/+" , "" )
67+ )
68+
69+ df2 = df2 .with_columns (
70+ pl .col ("_file_norm" ).map_elements (
71+ lambda p : capture_lang (root / p ),
72+ return_dtype = pl .Utf8 ,
73+ ).alias (langs_col )
74+ ).drop ("_file_norm" )
75+
76+ return df2
77+
78+
79+ def build_syllabus_df (
80+ syllabus_json_path : Union [str , Path ] = "doc/syllabus.json" ,
81+ section_map : Mapping [str , str ] = DEFAULT_SECTION_MAP ,
82+ active_color : str = "#E16462" ,
83+ inactive_color : str = "#E9ECEF" ,
84+ ) -> pl .DataFrame :
85+ """
86+ Lit un fichier JSON de syllabus et renvoie un DataFrame Polars enrichi.
87+
88+ Paramètres
89+ ----------
90+ syllabus_json_path:
91+ Chemin vers le fichier JSON (par défaut "doc/syllabus.json").
92+ section_map:
93+ Mapping {part: section} (part est le dossier après /content/).
94+ active_color:
95+ Couleur utilisée pour les icônes "actives".
96+ inactive_color:
97+ Couleur utilisée pour les icônes "inactives" (uniquement pour classroom_icon).
98+
99+ Retour
100+ ------
101+ pl.DataFrame
102+ DataFrame avec colonnes: file, classroom, exercise, part, section,
103+ classroom_icon, material_icon
104+ """
105+ path = Path (syllabus_json_path )
106+
107+ with path .open ("r" , encoding = "utf-8" ) as f :
108+ syllabus = json .load (f )
109+
110+ if not isinstance (syllabus , list ):
111+ raise ValueError ("Le JSON attendu doit être une liste de dictionnaires (list[dict])." )
112+
113+ df = pl .DataFrame (syllabus )
114+
115+ # 1) Extraire la "part" depuis /content/<part>/
116+ df = df .with_columns (
117+ part = pl .col ("file" ).cast (pl .Utf8 ).str .extract (r"^/?content/([^/]+)/" , 1 )
118+ )
119+
120+ # 2) Mapper vers une section lisible
121+ df = df .with_columns (
122+ section = pl .col ("part" ).replace (section_map , default = "Autre" )
123+ )
124+
125+ # 3) Icône classroom: active si classroom non nul
126+ df = df .with_columns (
127+ classroom_icon = pl .col ("classroom" )
128+ .is_not_null ()
129+ .map_elements (
130+ lambda x : gte .fa_icon_repeat (
131+ name = "chalkboard-user" ,
132+ fill = active_color if x else inactive_color ,
133+ ),
134+ return_dtype = pl .String ,
135+ )
136+ )
137+ df = df .with_columns (
138+ classroom = pl .when (pl .col ("classroom" ).is_null ())
139+ .then (pl .lit ("Self learning" ))
140+ .otherwise (pl .concat_str ([pl .col ("classroom" ), pl .lit (" or self learning" )]))
141+ )
142+
143+ # 4) Icône matériel: laptop-code si exercise True, sinon book-open-reader (toujours active_color)
144+ df = df .with_columns (
145+ material_icon = pl .col ("exercise" )
146+ .fill_null (False )
147+ .map_elements (
148+ lambda x : gte .fa_icon_repeat (
149+ name = "laptop-code" if x else "book-open-reader" ,
150+ fill = active_color ,
151+ ),
152+ return_dtype = pl .String ,
153+ )
154+ )
155+
156+ df = df .with_columns (
157+ type = pl .when (pl .col ("exercise" ).fill_null (False ))
158+ .then (pl .lit ("Read from website and exercise notebooks" ))
159+ .otherwise (pl .lit ("Read from website" ))
160+ )
161+
162+ return df
163+
164+
165+
166+ def _normalize_title (title : Optional [str ]) -> Optional [str ]:
167+ """Règles de normalisation de titre."""
168+ if not title :
169+ return None
170+
171+ title = title .strip ()
172+
173+ # "Partie ..." -> "Introduction à la partie ..."
174+ if re .match (r"^\s*Partie\b" , title ):
175+ title = re .sub (r"^\s*Partie\b" , "Introduction à la partie" , title , count = 1 )
176+
177+ # "Partie ..." -> "Introduction à la partie ..."
178+ if re .match (r"^\s*Part\b" , title ):
179+ title = re .sub (r"^\s*Part\b" , "Introduction to part" , title , count = 1 )
180+
181+
182+ return title
183+
184+
185+ def capture_lang (path : Path ) -> Optional [str ]:
186+ """
187+ Détecte quelles langues sont disponibles dans le YAML front matter.
188+
189+ Retour:
190+ - "FR" si seul `title` est présent
191+ - "UK" si seul `title-en` est présent
192+ - "FR,UK" si les deux sont présents (priorité FR puis UK)
193+ - None si aucun des deux n'est présent ou fichier illisible
194+ """
195+ if not path .exists ():
196+ return None
197+
198+ try :
199+ post = frontmatter .load (path )
200+ except Exception :
201+ return None
202+
203+ has_fr = isinstance (post .get ("title" ), str ) and post .get ("title" ).strip () != ""
204+ has_uk = isinstance (post .get ("title-en" ), str ) and post .get ("title-en" ).strip () != ""
205+
206+ if has_fr and has_uk :
207+ return "FR,US"
208+ if has_fr :
209+ return "FR"
210+ if has_uk :
211+ return "US"
212+ return None
213+
214+ def _extract_title_from_qmd (
215+ path : Path ,
216+ lang : str = "fr"
217+ ) -> Optional [str ]:
218+ """Extrait `title` du YAML front matter d'un .qmd via python-frontmatter."""
219+ if not path .exists ():
220+ return None
221+
222+ try :
223+ post = frontmatter .load (path )
224+ except Exception :
225+ return None
226+
227+ title_location = "title" if lang == "fr" else "title-en"
228+ title = post .get (title_location , post .get ("title" ))
229+ return _normalize_title (title ) if isinstance (title , str ) else None
230+
231+
232+ def _md_link (label : Optional [str ], href : str ) -> Optional [str ]:
233+ if not label :
234+ return None
235+ # Évite de casser le markdown si jamais le titre contient des crochets
236+ safe_label = label .replace ("[" , "\\ [" ).replace ("]" , "\\ ]" )
237+ return f"[{ safe_label } ]({ href } )"
238+
239+
240+ def add_titles_from_qmd (
241+ df : pl .DataFrame ,
242+ project_root : Union [str , Path ] = "." ,
243+ file_col : str = "file" ,
244+ title_col : str = "title" ,
245+ link_col : str = "title_link" ,
246+ lang : str = "fr"
247+ ) -> pl .DataFrame :
248+ """
249+ Ajoute:
250+ - `title_col`: titre normalisé (str ou None)
251+ - `link_col`: lien markdown "[titre](chemin)" (str ou None)
252+
253+ `chemin` est basé sur la colonne `file` normalisée (sans slash initial).
254+ """
255+ root = Path (project_root )
256+
257+ # Normalise les chemins: "/content/..." -> "content/..."
258+ df2 = df .with_columns (
259+ _file_norm = pl .col (file_col ).cast (pl .Utf8 ).str .replace (r"^/+" , "" )
260+ )
261+
262+ # Extraire titre
263+ df2 = df2 .with_columns (
264+ pl .col ("_file_norm" ).map_elements (
265+ lambda p : _extract_title_from_qmd (root / p , lang ),
266+ return_dtype = pl .Utf8 ,
267+ ).alias (title_col )
268+ )
269+
270+ # Construire lien markdown [titre](chemin)
271+ df2 = df2 .with_columns (
272+ pl .struct (["_file_norm" , title_col ]).map_elements (
273+ lambda s : _md_link (s [title_col ], s ["_file_norm" ]),
274+ return_dtype = pl .Utf8 ,
275+ ).alias (link_col )
276+ ).drop ("_file_norm" )
277+
278+ return df2
279+
280+
281+ def build_syllabus_gt (
282+ df_section : pl .DataFrame ,
283+ dark_theme : bool = False ,
284+ ) -> GT :
285+ """
286+ Construit une table great_tables (GT) pour une section du syllabus.
287+
288+ Attendu dans df_section:
289+ - section (sera cachée)
290+ - title_link (markdown)
291+ - classroom_icon, classroom
292+ - material_icon, type
293+ """
294+ gt = (
295+ GT (df_section )
296+ .cols_hide (columns = "section" )
297+ .fmt_markdown ("title_link" )
298+ .fmt_flag (columns = "langs" )
299+ .cols_label (
300+ {
301+ "title_link" : md ("**Title**" ),
302+ "classroom_icon" : md ("**Learning mode**" ),
303+ "material_icon" : md ("**Resource type**" ),
304+ "langs" : md ("**Available into**" )
305+ }
306+ )
307+ .cols_width ({"title_link" : "60%" })
308+ .pipe (gte .gt_merge_stack , col1 = "classroom_icon" , col2 = "classroom" )
309+ .pipe (gte .gt_merge_stack , col1 = "material_icon" , col2 = "type" )
310+ )
311+
312+ if dark_theme :
313+ gt = gt .pipe (gte .gt_theme_dark )
314+
315+ return gt
0 commit comments