-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Model] Fix cached buffer allocation and fix dynamic shape in Phi3v
This PR fixes the cached buffer allocation for larger new images and supports dynamic shapes in the vision encoder.
- Loading branch information
Showing
9 changed files
with
434 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/*! | ||
* Copyright (c) 2023-2024 by Contributors | ||
* \file support/image_utils.cc | ||
*/ | ||
#include "vlm_utils.h" | ||
|
||
#include <cmath> | ||
|
||
namespace mlc { | ||
namespace llm { | ||
|
||
void CalculateResizeShape(tvm::runtime::NDArray image_data, std::string model_type, | ||
int& target_height, int& target_width) { | ||
ICHECK_EQ(image_data->shape[3], 3) << "Image format must be NHWC"; | ||
int height = image_data->shape[1]; | ||
int width = image_data->shape[2]; | ||
|
||
if ("phi3_v" == model_type) { | ||
const int hd_num = 4; | ||
double ratio = static_cast<double>(width) / height; | ||
int scale = 1; | ||
while (scale * std::ceil(scale / ratio) <= hd_num) { | ||
scale += 1; | ||
} | ||
scale -= 1; | ||
target_width = static_cast<int>(scale * 336); | ||
target_height = static_cast<int>(target_width / ratio); | ||
} | ||
} | ||
|
||
void CalculatePadShape(tvm::runtime::NDArray image_data, std::string model_type, int& pad_height, | ||
int& pad_width) { | ||
ICHECK_EQ(image_data->shape[3], 3) << "Image format must be NHWC"; | ||
if ("phi3_v" == model_type) { | ||
int resized_height = 0, resized_width = 0; | ||
CalculateResizeShape(image_data, model_type, resized_height, resized_width); | ||
int tar = (int)(ceil(resized_height / 336.0) * 336); | ||
int top_padding = (int)((tar - resized_height) / 2); | ||
int bottom_padding = tar - resized_height - top_padding; | ||
ICHECK_EQ(tar, resized_height + top_padding + bottom_padding) << "Padding size not equal!"; | ||
pad_height = tar; | ||
pad_width = resized_width; | ||
} | ||
} | ||
|
||
void CalculateCropShape(tvm::runtime::NDArray image_data, std::string model_type, int& crop_height, | ||
int& crop_width) { | ||
ICHECK_EQ(image_data->shape[3], 3) << "Image format must be NHWC"; | ||
if ("phi3_v" == model_type) { | ||
int pad_h = 0, pad_w = 0; | ||
CalculatePadShape(image_data, model_type, pad_h, pad_w); | ||
crop_height = pad_h / 336; | ||
crop_width = pad_w / 336; | ||
} | ||
} | ||
|
||
} // namespace llm | ||
} // namespace mlc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/*! | ||
* Copyright (c) 2023-2024 by Contributors | ||
* \file support/vlm_utils.h | ||
* \brief Tools for debug purposes. | ||
*/ | ||
#ifndef MLC_LLM_SUPPORT_VLM_UTILS_H_ | ||
#define MLC_LLM_SUPPORT_VLM_UTILS_H_ | ||
|
||
#include <tvm/runtime/ndarray.h> | ||
|
||
#include <string> | ||
|
||
namespace mlc { | ||
namespace llm { | ||
|
||
/*! | ||
* \brief Calculate the target height and width for resizing an image based on the input data and | ||
* model type. \param image_data The input image data as a TVM NDArray. \param model_type The type | ||
* of the model influencing the resizing parameters (e.g., phi3v). \param target_height Reference to | ||
* the variable where the calculated target height will be stored. \param target_width Reference to | ||
* the variable where the calculated target width will be stored. | ||
*/ | ||
void CalculateResizeShape(tvm::runtime::NDArray image_data, std::string model_type, | ||
int& target_height, int& target_width); | ||
/*! | ||
* \brief Calculate the padding height and width for an image based on the input data and model | ||
* type. \param image_data The input image data as a TVM NDArray. \param model_type The type of the | ||
* model influencing the padding parameters (e.g., phi3v). \param pad_height Reference to the | ||
* variable where the calculated padding height will be stored. \param pad_width Reference to the | ||
* variable where the calculated padding width will be stored. | ||
*/ | ||
void CalculatePadShape(tvm::runtime::NDArray image_data, std::string model_type, int& pad_height, | ||
int& pad_width); | ||
|
||
/*! | ||
* \brief Calculate the cropping height and width for an image based on the input data and model | ||
* type. \param image_data The input image data as a TVM NDArray. \param model_type The type of the | ||
* model influencing the cropping parameters (e.g., phi3v). \param crop_height Reference to the | ||
* variable where the calculated cropping height will be stored. \param crop_width Reference to the | ||
* variable where the calculated cropping width will be stored. | ||
*/ | ||
void CalculateCropShape(tvm::runtime::NDArray image_data, std::string model_type, int& crop_height, | ||
int& crop_width); | ||
|
||
} // namespace llm | ||
} // namespace mlc | ||
|
||
#endif // MLC_LLM_SUPPORT_IMAGE_UTILS_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.