lhoestq HF staff commited on
Commit
2579f7c
·
1 Parent(s): 0b712fa

hide params on reclick

Browse files
Files changed (1) hide show
  1. app.py +28 -68
app.py CHANGED
@@ -78,52 +78,9 @@ DEFAULT_CODE = dedent(
78
 
79
  make_gallery_image_buttons_js = """
80
  function load() {
81
- class ClassWatcher {
82
-
83
- constructor(targetNode, classToWatch, classAddedCallback, arg) {
84
- this.targetNode = targetNode
85
- this.classToWatch = classToWatch
86
- this.classAddedCallback = classAddedCallback
87
- this.arg = arg
88
- this.observer = null
89
- this.lastClassState = targetNode.classList.contains(this.classToWatch)
90
-
91
- this.init()
92
- }
93
-
94
- init() {
95
- this.observer = new MutationObserver(this.mutationCallback)
96
- this.observe()
97
- }
98
-
99
- observe() {
100
- this.observer.observe(this.targetNode, { attributes: true })
101
- }
102
-
103
- disconnect() {
104
- this.observer.disconnect()
105
- }
106
-
107
- mutationCallback = mutationsList => {
108
- for (let mutation of mutationsList) {
109
- if (mutation.type === 'attributes' && mutation.attributeName === 'class') {
110
- let currentClassState = mutation.target.classList.contains(this.classToWatch)
111
- if(this.lastClassState !== currentClassState) {
112
- this.lastClassState = currentClassState
113
- if(currentClassState) {
114
- this.classAddedCallback(this.arg)
115
- }
116
- }
117
- }
118
- }
119
- }
120
- }
121
  let buttons = document.getElementsByClassName("block-button");
122
- function clickButton(i) {
123
- buttons[i].click();
124
- }
125
  Array.from(document.getElementById("pipeline-gallery").getElementsByClassName("thumbnail-item")).map(
126
- (b, i) => new ClassWatcher(b, 'selected', clickButton, i)
127
  )
128
  }
129
  """
@@ -147,7 +104,7 @@ tr td {
147
  min-height: 600px;
148
  max-height: 600px;
149
  }
150
- .excluded_tabs .tab-wrapper .tab-container {
151
  overflow: scroll;
152
  }
153
  """
@@ -164,10 +121,10 @@ def non_empty_list_or_none(input_list: list[str]) -> Optional[list[str]]:
164
 
165
 
166
  with gr.Blocks(css=css, js=make_gallery_image_buttons_js) as demo:
167
- state = gr.State({"selected_block": 0})
168
  gr.Markdown("# Common Crawl Pipeline Creator")
169
  with gr.Row():
170
- with gr.Column():
171
  gallery = gr.Gallery(
172
  blocks,
173
  columns=4,
@@ -344,28 +301,31 @@ with gr.Blocks(css=css, js=make_gallery_image_buttons_js) as demo:
344
  ]
345
 
346
  with gr.Column():
347
- with gr.Tab("Output") as output_tab:
348
- output_dataframe = gr.DataFrame(datatype="markdown")
349
- with gr.Tab("Excluded") as excluded_tab:
350
- with gr.Tabs(elem_classes="excluded_tabs"):
351
- excluded_dataframes: dict[Type, gr.DataFrame] = {}
352
- excluded_tabs: dict[Type, gr.Tab] = {}
353
- for step in steps:
354
- if issubclass(step, BaseFilter) and step is not URLFilter:
355
- with gr.Tab(step.__name__) as t:
356
- excluded_dataframes[step] = gr.DataFrame(datatype="markdown")
357
- excluded_tabs[step] = t
358
- with gr.Tab("Python code") as code_tab:
359
- python_code_markdown = gr.Markdown(DEFAULT_CODE)
 
360
 
361
 
362
  gr.Markdown("_powered by [datatrove](https://github.com/huggingface/datatrove)_")
363
 
364
- def show_block_ui(i):
 
 
365
  return {**{block_ui: gr.Column(visible=(j == i)) for j, block_ui in enumerate(blocks_uis)}, state: {"selected_block": i}}
366
 
367
  for i, button in enumerate(gallery_image_buttons):
368
- button.click(partial(show_block_ui, i), outputs=blocks_uis + [state])
369
 
370
 
371
  inputs = [
@@ -505,8 +465,8 @@ with gr.Blocks(css=css, js=make_gallery_image_buttons_js) as demo:
505
 
506
  if num_warc_samples:
507
  yield {
508
- output_tab: gr.Tab(f"Output (~{len(output_docs)/num_warc_samples*100:.03f}% of data)"),
509
- excluded_tab: gr.Tab(f"Excluded (~{100 - len(output_docs)/num_warc_samples*100:.03f}% of data)"),
510
  output_dataframe: pd.DataFrame({"text": [doc.text for doc in output_docs]}),
511
  **{
512
  excluded_dataframes[type(step_to_run)]: pd.DataFrame({"text": [doc.text for doc in step_to_run.exclusion_writer.docs]})
@@ -514,7 +474,7 @@ with gr.Blocks(css=css, js=make_gallery_image_buttons_js) as demo:
514
  if isinstance(step_to_run, BaseFilter) and type(step_to_run) in excluded_dataframes
515
  },
516
  **{
517
- excluded_tabs[type(step_to_run)]: gr.Tab(f"{type(step_to_run).__name__} (~{len(step_to_run.exclusion_writer.docs)/num_warc_samples*100:.03f}% of data)")
518
  for step_to_run in pipeline_executor.pipeline
519
  if isinstance(step_to_run, BaseFilter) and type(step_to_run) in excluded_dataframes
520
  },
@@ -535,8 +495,8 @@ with gr.Blocks(css=css, js=make_gallery_image_buttons_js) as demo:
535
  },
536
  }
537
  yield {
538
- output_tab: gr.Tab(f"Output (~{len(output_docs)/num_warc_samples*100:.03f}% of data)"),
539
- excluded_tab: gr.Tab(f"Excluded (~{100 - len(output_docs)/num_warc_samples*100:.03f}% of data)"),
540
  output_dataframe: pd.DataFrame({"text": [doc.text for doc in output_docs]}),
541
  **{
542
  excluded_dataframes[type(step_to_run)]: pd.DataFrame({"text": [doc.text for doc in step_to_run.exclusion_writer.docs]})
@@ -544,7 +504,7 @@ with gr.Blocks(css=css, js=make_gallery_image_buttons_js) as demo:
544
  if isinstance(step_to_run, BaseFilter) and type(step_to_run) in excluded_dataframes
545
  },
546
  **{
547
- excluded_tabs[type(step_to_run)]: gr.Tab(f"{type(step_to_run).__name__} (~{len(step_to_run.exclusion_writer.docs)/num_warc_samples*100:.03f}% of data)")
548
  for step_to_run in pipeline_executor.pipeline
549
  if isinstance(step_to_run, BaseFilter) and type(step_to_run) in excluded_dataframes
550
  },
 
78
 
79
  make_gallery_image_buttons_js = """
80
  function load() {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  let buttons = document.getElementsByClassName("block-button");
 
 
 
82
  Array.from(document.getElementById("pipeline-gallery").getElementsByClassName("thumbnail-item")).map(
83
+ (b, i) => b.addEventListener("click", () => buttons[i].click())
84
  )
85
  }
86
  """
 
104
  min-height: 600px;
105
  max-height: 600px;
106
  }
107
+ .scollabe_tabs .tab-wrapper .tab-container {
108
  overflow: scroll;
109
  }
110
  """
 
121
 
122
 
123
  with gr.Blocks(css=css, js=make_gallery_image_buttons_js) as demo:
124
+ state = gr.State({"selected_block": None})
125
  gr.Markdown("# Common Crawl Pipeline Creator")
126
  with gr.Row():
127
+ with gr.Column(min_width=640):
128
  gallery = gr.Gallery(
129
  blocks,
130
  columns=4,
 
301
  ]
302
 
303
  with gr.Column():
304
+ with gr.Tabs(elem_classes="scollabe_tabs"):
305
+ with gr.Tab("Output (and % of data)") as output_tab:
306
+ output_dataframe = gr.DataFrame(datatype="markdown")
307
+ with gr.Tab("Excluded (and % of data)") as excluded_tab:
308
+ with gr.Tabs(elem_classes="scollabe_tabs"):
309
+ excluded_dataframes: dict[Type, gr.DataFrame] = {}
310
+ excluded_tabs: dict[Type, gr.Tab] = {}
311
+ for step in steps:
312
+ if issubclass(step, BaseFilter) and step is not URLFilter:
313
+ with gr.Tab(step.__name__ + " (and % of data)") as t:
314
+ excluded_dataframes[step] = gr.DataFrame(datatype="markdown")
315
+ excluded_tabs[step] = t
316
+ with gr.Tab("Python code") as code_tab:
317
+ python_code_markdown = gr.Markdown(DEFAULT_CODE)
318
 
319
 
320
  gr.Markdown("_powered by [datatrove](https://github.com/huggingface/datatrove)_")
321
 
322
+ def show_block_ui(i, current_state: dict):
323
+ if i == current_state.get("selected_block"):
324
+ i = None
325
  return {**{block_ui: gr.Column(visible=(j == i)) for j, block_ui in enumerate(blocks_uis)}, state: {"selected_block": i}}
326
 
327
  for i, button in enumerate(gallery_image_buttons):
328
+ button.click(partial(show_block_ui, i), inputs=[state], outputs=blocks_uis + [state])
329
 
330
 
331
  inputs = [
 
465
 
466
  if num_warc_samples:
467
  yield {
468
+ output_tab: gr.Tab(f"Output ({len(output_docs)/num_warc_samples*100:.03f}%)"),
469
+ excluded_tab: gr.Tab(f"Excluded ({100 - len(output_docs)/num_warc_samples*100:.03f}%)"),
470
  output_dataframe: pd.DataFrame({"text": [doc.text for doc in output_docs]}),
471
  **{
472
  excluded_dataframes[type(step_to_run)]: pd.DataFrame({"text": [doc.text for doc in step_to_run.exclusion_writer.docs]})
 
474
  if isinstance(step_to_run, BaseFilter) and type(step_to_run) in excluded_dataframes
475
  },
476
  **{
477
+ excluded_tabs[type(step_to_run)]: gr.Tab(f"{type(step_to_run).__name__} ({len(step_to_run.exclusion_writer.docs)/num_warc_samples*100:.03f}%)")
478
  for step_to_run in pipeline_executor.pipeline
479
  if isinstance(step_to_run, BaseFilter) and type(step_to_run) in excluded_dataframes
480
  },
 
495
  },
496
  }
497
  yield {
498
+ output_tab: gr.Tab(f"Output ({len(output_docs)/num_warc_samples*100:.03f}%)"),
499
+ excluded_tab: gr.Tab(f"Excluded ({100 - len(output_docs)/num_warc_samples*100:.03f}%)"),
500
  output_dataframe: pd.DataFrame({"text": [doc.text for doc in output_docs]}),
501
  **{
502
  excluded_dataframes[type(step_to_run)]: pd.DataFrame({"text": [doc.text for doc in step_to_run.exclusion_writer.docs]})
 
504
  if isinstance(step_to_run, BaseFilter) and type(step_to_run) in excluded_dataframes
505
  },
506
  **{
507
+ excluded_tabs[type(step_to_run)]: gr.Tab(f"{type(step_to_run).__name__} ({len(step_to_run.exclusion_writer.docs)/num_warc_samples*100:.03f}%)")
508
  for step_to_run in pipeline_executor.pipeline
509
  if isinstance(step_to_run, BaseFilter) and type(step_to_run) in excluded_dataframes
510
  },