diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts index d7cc87fb59..06e692aa72 100644 --- a/packages/inference/src/lib/getProviderHelper.ts +++ b/packages/inference/src/lib/getProviderHelper.ts @@ -24,6 +24,7 @@ import type { ImageSegmentationTaskHelper, ImageToImageTaskHelper, ImageToTextTaskHelper, + ImageToVideoTaskHelper, ObjectDetectionTaskHelper, QuestionAnsweringTaskHelper, SentenceSimilarityTaskHelper, @@ -239,6 +240,10 @@ export function getProviderHelper( provider: InferenceProviderOrPolicy, task: "image-to-image" ): ImageToImageTaskHelper & TaskProviderHelper; +export function getProviderHelper( + provider: InferenceProviderOrPolicy, + task: "image-to-video" +): ImageToVideoTaskHelper & TaskProviderHelper; export function getProviderHelper( provider: InferenceProviderOrPolicy, task: "sentence-similarity" @@ -275,7 +280,6 @@ export function getProviderHelper( provider: InferenceProviderOrPolicy, task: InferenceTask | undefined ): TaskProviderHelper; - export function getProviderHelper( provider: InferenceProviderOrPolicy, task: InferenceTask | undefined diff --git a/packages/inference/src/providers/providerHelper.ts b/packages/inference/src/providers/providerHelper.ts index f934ef8fb1..5260b23998 100644 --- a/packages/inference/src/providers/providerHelper.ts +++ b/packages/inference/src/providers/providerHelper.ts @@ -18,6 +18,7 @@ import type { ImageToImageInput, ImageToTextInput, ImageToTextOutput, + ImageToVideoInput, ObjectDetectionInput, ObjectDetectionOutput, QuestionAnsweringInput, @@ -52,6 +53,7 @@ import type { BaseArgs, BodyParams, HeaderParams, InferenceProvider, RequestArgs import { toArray } from "../utils/toArray.js"; import type { ImageToImageArgs } from "../tasks/cv/imageToImage.js"; import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition.js"; +import type { ImageToVideoArgs } from "../tasks/cv/imageToVideo.js"; /** * Base class for task-specific provider helpers @@ -150,6 +152,12 @@ export interface ImageToImageTaskHelper { preparePayloadAsync(args: ImageToImageArgs): Promise; } +export interface ImageToVideoTaskHelper { + getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise; + preparePayload(params: BodyParams): Record; + preparePayloadAsync(args: ImageToVideoArgs): Promise; +} + export interface ImageSegmentationTaskHelper { getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise; preparePayload(params: BodyParams): Record | BodyInit; diff --git a/packages/inference/src/tasks/cv/imageToVideo.ts b/packages/inference/src/tasks/cv/imageToVideo.ts new file mode 100644 index 0000000000..cfe3dd7b4b --- /dev/null +++ b/packages/inference/src/tasks/cv/imageToVideo.ts @@ -0,0 +1,24 @@ +import type { ImageToVideoInput } from "@huggingface/tasks"; +import { resolveProvider } from "../../lib/getInferenceProviderMapping.js"; +import { getProviderHelper } from "../../lib/getProviderHelper.js"; +import type { BaseArgs, Options } from "../../types.js"; +import { innerRequest } from "../../utils/request.js"; +import { makeRequestOptions } from "../../lib/makeRequestOptions.js"; + +export type ImageToVideoArgs = BaseArgs & ImageToVideoInput; + +/** + * This task reads some text input and outputs an image. + * Recommended model: Wan-AI/Wan2.1-I2V-14B-720P + */ +export async function imageToVideo(args: ImageToVideoArgs, options?: Options): Promise { + const provider = await resolveProvider(args.provider, args.model, args.endpointUrl); + const providerHelper = getProviderHelper(provider, "image-to-video"); + const payload = await providerHelper.preparePayloadAsync(args); + const { data: res } = await innerRequest(payload, providerHelper, { + ...options, + task: "image-to-video", + }); + const { url, info } = await makeRequestOptions(args, providerHelper, { ...options, task: "image-to-video" }); + return providerHelper.getResponse(res, url, info.headers as Record); +} diff --git a/packages/inference/src/tasks/index.ts b/packages/inference/src/tasks/index.ts index f32d87f0f6..a39779a227 100644 --- a/packages/inference/src/tasks/index.ts +++ b/packages/inference/src/tasks/index.ts @@ -13,6 +13,7 @@ export * from "./cv/imageClassification.js"; export * from "./cv/imageSegmentation.js"; export * from "./cv/imageToImage.js"; export * from "./cv/imageToText.js"; +export * from "./cv/imageToVideo.js"; export * from "./cv/objectDetection.js"; export * from "./cv/textToImage.js"; export * from "./cv/textToVideo.js";