Commit
·
e4449d4
1
Parent(s):
e612968
Updating for falcon
Browse files
assets/prompt-order-experiment.svg
CHANGED
mermaid.md
CHANGED
@@ -14,7 +14,7 @@ graph TD
|
|
14 |
style F fill:#333,stroke:#FF9D00,color:#FFD21E
|
15 |
|
16 |
subgraph Notebooks
|
17 |
-
NB0[00-poe-generate-
|
18 |
NB1[01-poe-dataset-creation.ipynb]
|
19 |
NB2[02-autotrain.ipynb]
|
20 |
NB3[03-poe-token-count-exploration.ipynb]
|
@@ -23,15 +23,15 @@ graph TD
|
|
23 |
|
24 |
subgraph Models
|
25 |
D[Fine-Tuned MODELS]
|
26 |
-
G[BASE_MODEL:
|
27 |
end
|
28 |
|
29 |
subgraph Datasets
|
30 |
A[(layoric/labeled-multiple-choice-explained)]
|
31 |
-
B[(derek-thomas/labeled-multiple-choice-explained-
|
32 |
-
C[(derek-thomas/labeled-multiple-choice-explained-
|
33 |
E[Deployment Config]
|
34 |
-
F[(derek-thomas/labeled-multiple-choice-explained-
|
35 |
end
|
36 |
|
37 |
A --> NB0
|
@@ -56,14 +56,14 @@ graph TD
|
|
56 |
G --> NB4
|
57 |
NB4 --> F
|
58 |
|
59 |
-
click NB0 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/00-poe-generate-
|
60 |
click NB1 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/01-poe-dataset-creation.ipynb"
|
61 |
click NB2 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/02-autotrain.ipynb"
|
62 |
click NB3 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/03-poe-token-count-exploration.ipynb"
|
63 |
click NB4 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/04-poe-eval.ipynb"
|
64 |
-
click G href "https://huggingface.co/
|
65 |
click A href "https://huggingface.co/datasets/layoric/labeled-multiple-choice-explained"
|
66 |
-
click B href "https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-
|
67 |
-
click C href "https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-
|
68 |
-
click F href "https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-
|
69 |
```
|
|
|
14 |
style F fill:#333,stroke:#FF9D00,color:#FFD21E
|
15 |
|
16 |
subgraph Notebooks
|
17 |
+
NB0[00-poe-generate-falcon-reasoning.ipynb]
|
18 |
NB1[01-poe-dataset-creation.ipynb]
|
19 |
NB2[02-autotrain.ipynb]
|
20 |
NB3[03-poe-token-count-exploration.ipynb]
|
|
|
23 |
|
24 |
subgraph Models
|
25 |
D[Fine-Tuned MODELS]
|
26 |
+
G[BASE_MODEL: tiiuae/Falcon3-7B-Instruct]
|
27 |
end
|
28 |
|
29 |
subgraph Datasets
|
30 |
A[(layoric/labeled-multiple-choice-explained)]
|
31 |
+
B[(derek-thomas/labeled-multiple-choice-explained-falcon-reasoning)]
|
32 |
+
C[(derek-thomas/labeled-multiple-choice-explained-falcon-tokenized)]
|
33 |
E[Deployment Config]
|
34 |
+
F[(derek-thomas/labeled-multiple-choice-explained-falcon-results)]
|
35 |
end
|
36 |
|
37 |
A --> NB0
|
|
|
56 |
G --> NB4
|
57 |
NB4 --> F
|
58 |
|
59 |
+
click NB0 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/00-poe-generate-falcon-reasoning.ipynb"
|
60 |
click NB1 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/01-poe-dataset-creation.ipynb"
|
61 |
click NB2 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/02-autotrain.ipynb"
|
62 |
click NB3 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/03-poe-token-count-exploration.ipynb"
|
63 |
click NB4 href "https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/04-poe-eval.ipynb"
|
64 |
+
click G href "https://huggingface.co/tiiuae/Falcon3-7B-Instruct"
|
65 |
click A href "https://huggingface.co/datasets/layoric/labeled-multiple-choice-explained"
|
66 |
+
click B href "https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-falcon-reasoning"
|
67 |
+
click C href "https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-falcon-tokenized"
|
68 |
+
click F href "https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-falcon-results"
|
69 |
```
|
prompt_order_exeriment/pages/index.py
CHANGED
@@ -11,7 +11,7 @@ This experiment aims to explore various scenarios for **prompt fine-tuning** usi
|
|
11 |
## Scenarios
|
12 |
We will evaluate the following prompt orders:
|
13 |
|
14 |
-
### **Scenario 1: Q - AC - R - FA** (
|
15 |
|
16 |
This is the most natural order. The model generates reasoning before the final answer, providing the most information prior to making a selection. This order leverages decoding mechanics effectively.
|
17 |
|
@@ -35,7 +35,7 @@ This is our assistant message, you can see that we are forcing a JSON (note I ad
|
|
35 |
```
|
36 |
</details>
|
37 |
|
38 |
-
### **Scenario 2: Q - AC - FA - R** (
|
39 |
|
40 |
An awkward order, placing reasoning after the final answer. While it is faster, it assumes the model can "know" reasoning internally before generating it. This approach saves tokens but is a skeptical case worth testing.
|
41 |
|
|
|
11 |
## Scenarios
|
12 |
We will evaluate the following prompt orders:
|
13 |
|
14 |
+
### **Scenario 1: Q - AC - R - FA** (Falcon and GPT3.5)
|
15 |
|
16 |
This is the most natural order. The model generates reasoning before the final answer, providing the most information prior to making a selection. This order leverages decoding mechanics effectively.
|
17 |
|
|
|
35 |
```
|
36 |
</details>
|
37 |
|
38 |
+
### **Scenario 2: Q - AC - FA - R** (Falcon and GPT3.5)
|
39 |
|
40 |
An awkward order, placing reasoning after the final answer. While it is faster, it assumes the model can "know" reasoning internally before generating it. This approach saves tokens but is a skeptical case worth testing.
|
41 |
|
prompt_order_exeriment/pages/overview.py
CHANGED
@@ -3,9 +3,9 @@ import reflex as rx
|
|
3 |
p2 = '''
|
4 |
# Steps
|
5 |
### Dataset Selection
|
6 |
-
We begin with the <a href="https://huggingface.co/datasets/layoric/labeled-multiple-choice-explained" target="_blank">layoric/labeled-multiple-choice-explained</a> dataset, which includes reasoning provided by GPT-3.5-turbo. reasoning explanations serve as a starting point but may differ from
|
7 |
|
8 |
-
0. <i><a href="https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/00-poe-generate-
|
9 |
1. <i><a href="https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/01-poe-dataset-creation.ipynb" target="_blank">01-poe-dataset-creation.ipynb</a></i>: Then we need to create our prompt experiments.
|
10 |
2. <i><a href="https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/02-autotrain.ipynb" target="_blank">02-autotrain.ipynb</a></i>: We generate autotrain jobs on spaces to train our models.
|
11 |
3. <i><a href="https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/03-poe-token-count-exploration.ipynb" target="_blank">03-poe-token-count-exploration.ipynb</a></i>: We do some quick analysis so we can optimize our TGI settings.
|
|
|
3 |
p2 = '''
|
4 |
# Steps
|
5 |
### Dataset Selection
|
6 |
+
We begin with the <a href="https://huggingface.co/datasets/layoric/labeled-multiple-choice-explained" target="_blank">layoric/labeled-multiple-choice-explained</a> dataset, which includes reasoning provided by GPT-3.5-turbo. reasoning explanations serve as a starting point but may differ from Falcon's reasoning style.
|
7 |
|
8 |
+
0. <i><a href="https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/00-poe-generate-falcon-reasoning.ipynb" target="_blank">00-poe-generate-falcon-reasoning.ipynb</a></i>: To align with falcon, we need to create a refined dataset: <a href="https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-falcon-reasoning" target="_blank">derek-thomas/labeled-multiple-choice-explained-falcon-reasoning</a>.
|
9 |
1. <i><a href="https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/01-poe-dataset-creation.ipynb" target="_blank">01-poe-dataset-creation.ipynb</a></i>: Then we need to create our prompt experiments.
|
10 |
2. <i><a href="https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/02-autotrain.ipynb" target="_blank">02-autotrain.ipynb</a></i>: We generate autotrain jobs on spaces to train our models.
|
11 |
3. <i><a href="https://huggingface.co/derek-thomas/prompt-order-experiment/blob/main/03-poe-token-count-exploration.ipynb" target="_blank">03-poe-token-count-exploration.ipynb</a></i>: We do some quick analysis so we can optimize our TGI settings.
|
prompt_order_exeriment/pages/results.py
CHANGED
@@ -13,7 +13,7 @@ Make sure you explore what happeened between:
|
|
13 |
"""
|
14 |
|
15 |
# Load the HF dataset
|
16 |
-
dataset = load_dataset("derek-thomas/labeled-multiple-choice-explained-
|
17 |
|
18 |
# Convert the dataset to a Pandas DataFrame
|
19 |
df = dataset['train'].to_pandas()
|
@@ -22,8 +22,8 @@ df = dataset['train'].to_pandas()
|
|
22 |
cols_to_analyze = [
|
23 |
"predictions_base",
|
24 |
"predictions_FA",
|
25 |
-
"
|
26 |
-
"
|
27 |
"predictions_RFA_gpt3_5",
|
28 |
"predictions_FAR_gpt3_5",
|
29 |
]
|
@@ -32,8 +32,8 @@ cols_to_analyze = [
|
|
32 |
model_names = {
|
33 |
"predictions_base": "Base Model",
|
34 |
"predictions_FA": "Final Answer",
|
35 |
-
"
|
36 |
-
"
|
37 |
"predictions_RFA_gpt3_5": "Reasoning (GPT-3.5 ) -> Final Answer",
|
38 |
"predictions_FAR_gpt3_5": "Final Answer -> Reasoning(GPT-3.5)",
|
39 |
}
|
|
|
13 |
"""
|
14 |
|
15 |
# Load the HF dataset
|
16 |
+
dataset = load_dataset("derek-thomas/labeled-multiple-choice-explained-falcon-results")
|
17 |
|
18 |
# Convert the dataset to a Pandas DataFrame
|
19 |
df = dataset['train'].to_pandas()
|
|
|
22 |
cols_to_analyze = [
|
23 |
"predictions_base",
|
24 |
"predictions_FA",
|
25 |
+
"predictions_RFA_falcon",
|
26 |
+
"predictions_FAR_falcon",
|
27 |
"predictions_RFA_gpt3_5",
|
28 |
"predictions_FAR_gpt3_5",
|
29 |
]
|
|
|
32 |
model_names = {
|
33 |
"predictions_base": "Base Model",
|
34 |
"predictions_FA": "Final Answer",
|
35 |
+
"predictions_RFA_falcon": "Reasoning (Falcon) -> Final Answer)",
|
36 |
+
"predictions_FAR_falcon": "Final Answer -> Reasoning (Falcon)",
|
37 |
"predictions_RFA_gpt3_5": "Reasoning (GPT-3.5 ) -> Final Answer",
|
38 |
"predictions_FAR_gpt3_5": "Final Answer -> Reasoning(GPT-3.5)",
|
39 |
}
|