Skip to content

Commit

Permalink
add loading into HF datasets library
Browse files Browse the repository at this point in the history
Signed-off-by: Michele Dolfi <[email protected]>
  • Loading branch information
dolfim-ibm committed Sep 3, 2024
1 parent 5b2a3e7 commit 46082e9
Show file tree
Hide file tree
Showing 3 changed files with 660 additions and 3 deletions.
12 changes: 12 additions & 0 deletions examples/export_multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ def main():
f"The example failed converting {failure_count} on {len(input_doc_paths)}."
)

# This block demonstrates how the file can be opened with the HF datasets library
# from datasets import Dataset
# from PIL import Image
# multimodal_df = pd.read_parquet(output_filename)

# # Convert pandas DataFrame to Hugging Face Dataset and load bytes into image
# dataset = Dataset.from_pandas(multimodal_df)
# def transforms(examples):
# examples["image"] = Image.frombytes('RGB', (examples["image.width"], examples["image.height"]), examples["image.bytes"], 'raw')
# return examples
# dataset = dataset.map(transforms)


if __name__ == "__main__":
main()
Loading

0 comments on commit 46082e9

Please sign in to comment.