Skip to content

Commit

Permalink
address mypy issues
Browse files Browse the repository at this point in the history
Signed-off-by: Aakanksha Duggal <[email protected]>
  • Loading branch information
aakankshaduggal authored Nov 12, 2024
1 parent cd0b73a commit 3040657
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions src/instructlab/sdg/datamixing.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,9 @@ def __create_auxiliary_ds(rec):


def _create_phase10_ds(
generated_dataset: Dataset, auxiliary_inst: Optional[Dict[str, List[str]]], use_legacy_pretraining_format: bool,
generated_dataset: Dataset,
auxiliary_inst: Optional[Dict[str, List[str]]],
use_legacy_pretraining_format: bool,
):
"""
Create a dataset for Phase 1.0 of downstream training.
Expand All @@ -461,14 +463,14 @@ def _create_phase10_ds(
# Include phase07
pretraining_knowledge_ds = _generate_knowledge_qa_dataset(
generated_dataset, keep_context_separate=False
).map(
lambda rec: _conv_pretrain(rec, use_legacy_pretraining_format)
)
).map(lambda rec: _conv_pretrain(rec, use_legacy_pretraining_format))

auxiliary_dataset = _create_auxiliary_dataset(generated_dataset, auxiliary_inst)

if auxiliary_dataset is not None:
phase10 = concatenate_datasets([raft_knowledge_ds, pretraining_knowledge_ds, auxiliary_dataset])
phase10 = concatenate_datasets(
[raft_knowledge_ds, pretraining_knowledge_ds, auxiliary_dataset]
)
else:
phase10 = concatenate_datasets([raft_knowledge_ds, pretraining_knowledge_ds])
return phase10
Expand Down

0 comments on commit 3040657

Please sign in to comment.