CarperAI
diff --git a/‎.pre-commit-config.yaml
+3-3 b/‎.pre-commit-config.yaml
+3-3
diff --git a/‎examples/architext.py
+1-3 b/‎examples/architext.py
+1-3
diff --git a/‎examples/experiments/grounded_program_synthesis/lang.py
+2-3 b/‎examples/experiments/grounded_program_synthesis/lang.py
+2-3
diff --git a/‎examples/experiments/grounded_program_synthesis/train_trlx.py
+3-15 b/‎examples/experiments/grounded_program_synthesis/train_trlx.py
+3-15
diff --git a/‎examples/randomwalks/randomwalks.py
+4-19 b/‎examples/randomwalks/randomwalks.py
+4-19
diff --git a/‎examples/summarize_daily_cnn/t5_summarize_daily_cnn.py
+4-11 b/‎examples/summarize_daily_cnn/t5_summarize_daily_cnn.py
+4-11
diff --git a/‎examples/summarize_rlhf/reward_model/gptj_reward_test.py
+4-12 b/‎examples/summarize_rlhf/reward_model/gptj_reward_test.py
+4-12
diff --git a/‎examples/summarize_rlhf/reward_model/reward_model.py
+2-8 b/‎examples/summarize_rlhf/reward_model/reward_model.py
+2-8
diff --git a/‎examples/summarize_rlhf/reward_model/train_reward_model_gptj.py
+1-3 b/‎examples/summarize_rlhf/reward_model/train_reward_model_gptj.py
+1-3
diff --git a/‎examples/summarize_rlhf/sft/summarize_dataset.py
+7-19 b/‎examples/summarize_rlhf/sft/summarize_dataset.py
+7-19
diff --git a/‎examples/summarize_rlhf/trlx_gptj_text_summarization.py
+6-18 b/‎examples/summarize_rlhf/trlx_gptj_text_summarization.py
+6-18
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.1.0
+    rev: v4.4.0
     hooks:
         - id: check-case-conflict
         - id: check-json
@@ -19,12 +19,12 @@ repos:
         - id: requirements-txt-fixer
         - id: trailing-whitespace
 -   repo: https://github.com/psf/black
-    rev: 22.10.0
+    rev: 23.1.0
     hooks:
     -   id: black
         files: ^(trlx|examples|tests|setup.py)/
 -   repo: https://github.com/pycqa/isort
-    rev: 5.11.5
+    rev: 5.12.0
     hooks:
     -   id: isort
         name: isort (python)
 
@@ -38,9 +38,7 @@ def reward_fn(samples, **kwargs):
 def main(hparams={}):
     config = TRLConfig.update(default_config, hparams)
 
-    trlx.train(
-        "architext/gptj-162M", reward_fn=reward_fn, prompts=prompts, config=config
-    )
+    trlx.train("architext/gptj-162M", reward_fn=reward_fn, prompts=prompts, config=config)
 
 
 if __name__ == "__main__":
 
@@ -21,6 +21,7 @@ def init_random_input(len_range: int = 5, value_gen=5) -> list:
 
 const_integer = [-5, -4, -3, -2, -1, 1, 2, 3, 4, 5]
 
+
 # Functions in the DSL
 # Each function defines a transformation in the given DSL Grammar.
 def take(input_list: list, n: int) -> list:
@@ -372,9 +373,7 @@ def basic_stats(dataset, tokenizer):
     """
     length_list = []
     for examples in tqdm(dataset):
-        datapoint = tokenizer(
-            examples["input"] + " " + examples["output"] + "<|endoftext|>"
-        )
+        datapoint = tokenizer(examples["input"] + " " + examples["output"] + "<|endoftext|>")
         length_list.append(len(datapoint["input_ids"]))
     return {
         "max": max(length_list),
 
@@ -75,20 +75,8 @@ def main(hparams={}):
 
 if __name__ == "__main__":
     # TEST REWARD FUNTION
-    assert (
-        reward_fn(
-            ["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -4]),1)"]
-        )
-    ) == [1]
-    assert (
-        reward_fn(
-            ["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -a]),1)"]
-        )
-    ) == [-1]
-    assert (
-        reward_fn(
-            ["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -3]),1)"]
-        )
-    ) == [-0.5]
+    assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -4]),1)"])) == [1]
+    assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -a]),1)"])) == [-1]
+    assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -3]),1)"])) == [-0.5]
 
     main()
@@ -5,9 +5,7 @@
 import torch
 
 
-def generate_rand_int_excluding(
-    rng: np.random.RandomState, max: int, exclude: int
-) -> int:
+def generate_rand_int_excluding(rng: np.random.RandomState, max: int, exclude: int) -> int:
     """Random integer generator, excluding a specific number
 
     Args:
@@ -35,12 +33,7 @@ def generate_random_walks(  # noqa: max-complexity
     p_edge: float = 0.1,
     seed: int = 1002,
     gpt2_tokenizer: bool = False,
-) -> Tuple[
-    Callable[[List[str]], Dict[str, List[float]]],
-    List[str],
-    List[str],
-    torch.Tensor,
-]:
+) -> Tuple[Callable[[List[str]], Dict[str, List[float]]], List[str], List[str], torch.Tensor,]:
     """Generate random walks
 
     Args:
@@ -106,7 +99,6 @@ def generate_random_walks(  # noqa: max-complexity
 
     # Create n_walks samples
     for _ in range(n_walks):
-
         # Create a random starting node (that isn't already at the goal state)
         node: int = generate_rand_int_excluding(rng, n_nodes, goal)
 
@@ -116,7 +108,6 @@ def generate_random_walks(  # noqa: max-complexity
         # Do a series of steps, until we hit the maximum number of steps or the
         # goal state (whichever comes first)
         for _step in range(max_length - 1):
-
             # From the starting node, get all the nodes we can move to. Pick one
             # of these at random, and add it to the list of visited nodes
             node = rng.choice(np.nonzero(adjacency_matrix[node])[0])
@@ -143,9 +134,7 @@ def generate_random_walks(  # noqa: max-complexity
     for start in set(range(n_nodes)) - {goal}:
         try:
             # Find the shortest path (up to the max_length)
-            shortest_path = nx.shortest_path(directional_graph, start, goal)[
-                :max_length
-            ]
+            shortest_path = nx.shortest_path(directional_graph, start, goal)[:max_length]
             shortest_lengths.append(len(shortest_path))
         except Exception:
             # If there is no path, use the maximum length instead
@@ -186,11 +175,7 @@ def metric_fn(
             for node in range(len(sample)):
                 # If an invalid path is taken, set the length to the invalid
                 # path score
-                if (
-                    sample[node] >= n_nodes
-                    or node > 0
-                    and not adjacency_matrix[sample[node - 1], sample[node]]
-                ):
+                if sample[node] >= n_nodes or node > 0 and not adjacency_matrix[sample[node - 1], sample[node]]:
                     length = invalid_path_length
                     break
 
 
@@ -12,8 +12,7 @@
     import evaluate
 except ImportError:
     raise ImportError(
-        "To run this example, please install the `evaluate` and `nltk` packages"
-        "by running `pip install evaluate`"
+        "To run this example, please install the `evaluate` and `nltk` packages" "by running `pip install evaluate`"
     )
 
 config_path = pathlib.Path(__file__).parent / "configs/ppo_config_cnn_daily.yml"
@@ -26,9 +25,7 @@
     def reward_fn(samples: List[str], prompts: List[str], outputs: List[str]):
         original_summaries = [prompt_label[prompt.strip()] for prompt in prompts]
         scores = [
-            meteor.compute(predictions=[output.strip()], references=[original])[
-                "meteor"
-            ]
+            meteor.compute(predictions=[output.strip()], references=[original])["meteor"]
             for (original, output) in zip(original_summaries, outputs)
         ]
         return scores
@@ -41,9 +38,7 @@ def reward_fn(samples: List[str], prompts: List[str], outputs: List[str]):
     prompts = ["Summarize: " + prompt for prompt in prompts]
 
     # take 1,000 samples from the validation set as prompts for evaluation
-    val_prompts = [
-        "Summarize: " + prompt for prompt in dataset["validation"]["article"][0:1000]
-    ]
+    val_prompts = ["Summarize: " + prompt for prompt in dataset["validation"]["article"][0:1000]]
     val_summaries = dataset["validation"]["highlights"][0:1000]
 
     # make dictionary of prompts and labels to use for reward function
@@ -63,9 +58,7 @@ def reward_fn(samples: List[str], prompts: List[str], outputs: List[str]):
 
     for i in tqdm(range(len(val_prompts))):
         key = tokenizer.decode(
-            tokenizer(val_prompts[i], truncation=True, max_length=max_length)[
-                "input_ids"
-            ],
+            tokenizer(val_prompts[i], truncation=True, max_length=max_length)["input_ids"],
             skip_special_tokens=True,
         )  # get prompt like trlx's prompt
         prompt_label[key.strip()] = val_summaries[i]
 
@@ -16,9 +16,7 @@ def set_seed(seed_val=42):
     torch.cuda.manual_seed_all(seed_val)
 
 
-def create_comparison_dataset(
-    path="CarperAI/openai_summarize_comparisons", split="train"
-):
+def create_comparison_dataset(path="CarperAI/openai_summarize_comparisons", split="train"):
     dataset = load_dataset(path, split=split)
     if split == "test":
         dataset = dataset.select(range(5000))
@@ -95,16 +93,12 @@ def __call__(self, data):
     model = GPTRewardModel("CarperAI/openai_summarize_tldr_sft")
     model.load_state_dict(torch.load("rm_checkpoint/pytorch_model.bin"))
     max_length = 550
-    val_pairs = create_comparison_dataset(
-        "CarperAI/openai_summarize_comparisons", "test"
-    )
+    val_pairs = create_comparison_dataset("CarperAI/openai_summarize_comparisons", "test")
     dev_dataset = PairwiseDataset(val_pairs, tokenizer, max_length=max_length)
 
     from torch.utils.data import DataLoader
 
-    dev_dataloader = DataLoader(
-        dev_dataset, shuffle=False, batch_size=6, collate_fn=DataCollatorReward()
-    )
+    dev_dataloader = DataLoader(dev_dataset, shuffle=False, batch_size=6, collate_fn=DataCollatorReward())
     model.cuda()
     model.eval()
     model.half()
@@ -116,9 +110,7 @@ def __call__(self, data):
             for x in batch:
                 batch[x] = batch[x].cuda()
             outputs = model(**batch)
-            correct += sum(
-                outputs["chosen_end_scores"] > outputs["rejected_end_scores"]
-            )
+            correct += sum(outputs["chosen_end_scores"] > outputs["rejected_end_scores"])
             chosen_list.append(outputs["chosen_end_scores"].cpu())
             reject_list.append(outputs["rejected_end_scores"].cpu())
     print("Total accuracy: ", correct / len(dev_dataset))
@@ -9,11 +9,7 @@ def __init__(self, model_path):
         model = AutoModelForCausalLM.from_pretrained(model_path)
         self.config = model.config
         # `gpt-neo(x)` models use `hidden_size` attribute names instead of `n_embd``
-        self.config.n_embd = (
-            self.config.hidden_size
-            if hasattr(self.config, "hidden_size")
-            else self.config.n_embd
-        )
+        self.config.n_embd = self.config.hidden_size if hasattr(self.config, "hidden_size") else self.config.n_embd
         self.transformer = model.transformer
         self.v_head = nn.Linear(self.config.n_embd, 1, bias=False)
         self.tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
@@ -91,9 +87,7 @@ def forward(
             rejected_end_scores.append(r_truncated_reward[-1])
 
             # Compute loss
-            loss += -torch.log(
-                torch.sigmoid(c_truncated_reward - r_truncated_reward)
-            ).mean()
+            loss += -torch.log(torch.sigmoid(c_truncated_reward - r_truncated_reward)).mean()
         loss = loss / bs
 
         if not inference:
 
@@ -8,9 +8,7 @@
 from transformers import AutoTokenizer, Trainer, TrainingArguments
 
 
-def create_comparison_dataset(
-    path="CarperAI/openai_summarize_comparisons", split="train"
-):
+def create_comparison_dataset(path="CarperAI/openai_summarize_comparisons", split="train"):
     dataset = load_dataset(path, split=split)
     pairs = []
     for sample in tqdm(dataset):
 
@@ -43,9 +43,7 @@ def __len__(self):
 
     def __getitem__(self, idx):
         txt = self.post_list[idx]
-        encodings_dict = self.tokenizer(
-            txt, truncation=True, max_length=self.max_length, padding="max_length"
-        )
+        encodings_dict = self.tokenizer(txt, truncation=True, max_length=self.max_length, padding="max_length")
         input_ids = torch.tensor(encodings_dict["input_ids"])
         attn_masks = torch.tensor(encodings_dict["attention_mask"])
 
@@ -75,19 +73,11 @@ def make_text(post, summarize):
             self.post_list.append(sample["info"]["post"])
             # NOTE: The chosen summary is always the first one, i.e. `sample["summaries"][0]`
             if sample["choice"] == 0:
-                self.summaries_0.append(
-                    make_text(sample["info"], sample["summaries"][0]["text"])
-                )
-                self.summaries_1.append(
-                    make_text(sample["info"], sample["summaries"][1]["text"])
-                )
+                self.summaries_0.append(make_text(sample["info"], sample["summaries"][0]["text"]))
+                self.summaries_1.append(make_text(sample["info"], sample["summaries"][1]["text"]))
             else:
-                self.summaries_0.append(
-                    make_text(sample["info"], sample["summaries"][1]["text"])
-                )
-                self.summaries_1.append(
-                    make_text(sample["info"], sample["summaries"][0]["text"])
-                )
+                self.summaries_0.append(make_text(sample["info"], sample["summaries"][1]["text"]))
+                self.summaries_1.append(make_text(sample["info"], sample["summaries"][0]["text"]))
             self.labels.append(0)
 
     def __len__(self):
@@ -113,7 +103,7 @@ def __init__(self, train_path, tokenizer, split, max_length=1024):
         if split == "valid":
             df = df.sample(n=5000)
         self.summarizes = []
-        for (i, row) in df.iterrows():
+        for i, row in df.iterrows():
             self.summarizes.append(f"Summarize: {row['text']}. TL;DR: {row['summary']}")
         self.tokenizer = tokenizer
         self.max_length = max_length
@@ -125,9 +115,7 @@ def __len__(self):
 
     def __getitem__(self, idx):
         txt = self.summarizes[idx]
-        encodings_dict = self.tokenizer(
-            txt, truncation=True, max_length=self.max_length, padding="max_length"
-        )
+        encodings_dict = self.tokenizer(txt, truncation=True, max_length=self.max_length, padding="max_length")
         input_ids = torch.tensor(encodings_dict["input_ids"])
         attn_masks = torch.tensor(encodings_dict["attention_mask"])
 
 
@@ -22,7 +22,6 @@
 
 
 if __name__ == "__main__":
-
     # Load the pre-trained reward model
     rw_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
     rw_tokenizer.pad_token = rw_tokenizer.eos_token
@@ -38,9 +37,7 @@ def get_scores(samples: List[str]):
         batch_size = 2
         for i in range(0, len(samples), batch_size):
             sub_samples = samples[i : i + batch_size]
-            sub_samples = [
-                "<|startoftext|>" + chosen + "<|endoftext|>" for chosen in sub_samples
-            ]
+            sub_samples = ["<|startoftext|>" + chosen + "<|endoftext|>" for chosen in sub_samples]
             encodings_dict = rw_tokenizer(
                 sub_samples,
                 truncation=True,
@@ -69,8 +66,7 @@ def get_prompt_dataset(prompts, max_length):
                 tokenizer(
                     prompts[i].split("TL;DR:")[0],
                     truncation=True,
-                    max_length=max_length
-                    - 5,  # to make sure "TL;DR" dont get truncated
+                    max_length=max_length - 5,  # to make sure "TL;DR" dont get truncated
                 )["input_ids"],
                 skip_special_tokens=True,
             ).strip()
@@ -84,25 +80,19 @@ def get_prompt_dataset(prompts, max_length):
 
     def reward_fn(samples: List[str], **kwargs):
         original_samples = [text.split("TL;DR:")[0] + "TL;DR: " for text in samples]
-        original_samples = [
-            text + post_summary_dict[text.strip()] for text in original_samples
-        ]
+        original_samples = [text + post_summary_dict[text.strip()] for text in original_samples]
         original_scores = get_scores(original_samples)
         scores = get_scores(samples)
         norms_scores = scores - original_scores
         return norms_scores
 
-    config_path = pathlib.Path(__file__).parent.joinpath(
-        "configs/ppo_config_summ_gptj.yml"
-    )
+    config_path = pathlib.Path(__file__).parent.joinpath("configs/ppo_config_summ_gptj.yml")
     config = TRLConfig.load_yaml(config_path)
 
     tokenizer = AutoTokenizer.from_pretrained(config.tokenizer.tokenizer_path)
     tokenizer.pad_token = tokenizer.eos_token
     tokenizer.padding_side = "left"
-    max_length_input = (
-        config.train.seq_length - config.method.gen_kwargs["max_new_tokens"]
-    )
+    max_length_input = config.train.seq_length - config.method.gen_kwargs["max_new_tokens"]
 
     dataset = load_dataset("CarperAI/openai_summarize_tldr")
 
@@ -127,8 +117,6 @@ def reward_fn(samples: List[str], **kwargs):
         config.model.model_path,
         reward_fn=reward_fn,
         prompts=train_prompts,
-        eval_prompts=val_prompts[
-            0:1000
-        ],  # sampling 1000 validation prompts for evaluation speed in training
+        eval_prompts=val_prompts[0:1000],  # sampling 1000 validation prompts for evaluation speed in training
         config=config,
     )