Skip to content

Commit c648001

Browse files
authored
feat: add detailed tensor loading time stat (leejet#793)
1 parent c587a43 commit c648001

File tree

2 files changed

+50
-6
lines changed

2 files changed

+50
-6
lines changed

model.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,6 +1966,16 @@ std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& v
19661966
}
19671967

19681968
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
1969+
int64_t process_time_ms = 0;
1970+
int64_t read_time_ms = 0;
1971+
int64_t memcpy_time_ms = 0;
1972+
int64_t copy_to_backend_time_ms = 0;
1973+
int64_t convert_time_ms = 0;
1974+
1975+
int64_t prev_time_ms = 0;
1976+
int64_t curr_time_ms = 0;
1977+
int64_t start_time = ggml_time_ms();
1978+
prev_time_ms = start_time;
19691979
std::vector<TensorStorage> processed_tensor_storages;
19701980
for (auto& tensor_storage : tensor_storages) {
19711981
// LOG_DEBUG("%s", name.c_str());
@@ -1978,6 +1988,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
19781988
}
19791989
std::vector<TensorStorage> dedup = remove_duplicates(processed_tensor_storages);
19801990
processed_tensor_storages = dedup;
1991+
curr_time_ms = ggml_time_ms();
1992+
process_time_ms = curr_time_ms - prev_time_ms;
1993+
prev_time_ms = curr_time_ms;
19811994

19821995
bool success = true;
19831996
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
@@ -2019,15 +2032,27 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
20192032
size_t entry_size = zip_entry_size(zip);
20202033
if (entry_size != n) {
20212034
read_buffer.resize(entry_size);
2035+
prev_time_ms = ggml_time_ms();
20222036
zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size);
2037+
curr_time_ms = ggml_time_ms();
2038+
read_time_ms += curr_time_ms - prev_time_ms;
2039+
prev_time_ms = curr_time_ms;
20232040
memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n);
2041+
curr_time_ms = ggml_time_ms();
2042+
memcpy_time_ms += curr_time_ms - prev_time_ms;
20242043
} else {
2044+
prev_time_ms = ggml_time_ms();
20252045
zip_entry_noallocread(zip, (void*)buf, n);
2046+
curr_time_ms = ggml_time_ms();
2047+
read_time_ms += curr_time_ms - prev_time_ms;
20262048
}
20272049
zip_entry_close(zip);
20282050
} else {
2051+
prev_time_ms = ggml_time_ms();
20292052
file.seekg(tensor_storage.offset);
20302053
file.read(buf, n);
2054+
curr_time_ms = ggml_time_ms();
2055+
read_time_ms += curr_time_ms - prev_time_ms;
20312056
if (!file) {
20322057
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
20332058
return false;
@@ -2072,6 +2097,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
20722097
read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);
20732098
}
20742099

2100+
prev_time_ms = ggml_time_ms();
20752101
if (tensor_storage.is_bf16) {
20762102
// inplace op
20772103
bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements());
@@ -2086,10 +2112,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
20862112
} else if (tensor_storage.is_i64) {
20872113
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements());
20882114
}
2115+
curr_time_ms = ggml_time_ms();
2116+
convert_time_ms += curr_time_ms - prev_time_ms;
20892117
} else {
20902118
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
20912119
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
20922120

2121+
prev_time_ms = ggml_time_ms();
20932122
if (tensor_storage.is_bf16) {
20942123
// inplace op
20952124
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
@@ -2109,11 +2138,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
21092138

21102139
convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data,
21112140
dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
2141+
curr_time_ms = ggml_time_ms();
2142+
convert_time_ms += curr_time_ms - prev_time_ms;
21122143
}
21132144
} else {
21142145
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
21152146
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
21162147

2148+
prev_time_ms = ggml_time_ms();
21172149
if (tensor_storage.is_bf16) {
21182150
// inplace op
21192151
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
@@ -2133,14 +2165,24 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
21332165

21342166
if (tensor_storage.type == dst_tensor->type) {
21352167
// copy to device memory
2168+
curr_time_ms = ggml_time_ms();
2169+
convert_time_ms += curr_time_ms - prev_time_ms;
2170+
prev_time_ms = curr_time_ms;
21362171
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
2172+
curr_time_ms = ggml_time_ms();
2173+
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
21372174
} else {
21382175
// convert first, then copy to device memory
21392176
convert_buffer.resize(ggml_nbytes(dst_tensor));
21402177
convert_tensor((void*)read_buffer.data(), tensor_storage.type,
21412178
(void*)convert_buffer.data(), dst_tensor->type,
21422179
(int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
2180+
curr_time_ms = ggml_time_ms();
2181+
convert_time_ms += curr_time_ms - prev_time_ms;
2182+
prev_time_ms = curr_time_ms;
21432183
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
2184+
curr_time_ms = ggml_time_ms();
2185+
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
21442186
}
21452187
}
21462188
++tensor_count;
@@ -2170,6 +2212,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
21702212
break;
21712213
}
21722214
}
2215+
int64_t end_time = ggml_time_ms();
2216+
LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
2217+
(end_time - start_time) / 1000.f,
2218+
process_time_ms / 1000.f,
2219+
read_time_ms / 1000.f,
2220+
memcpy_time_ms / 1000.f,
2221+
convert_time_ms / 1000.f,
2222+
copy_to_backend_time_ms / 1000.f);
21732223
return success;
21742224
}
21752225

stable-diffusion.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -557,8 +557,6 @@ class StableDiffusionGGML {
557557
// load weights
558558
LOG_DEBUG("loading weights");
559559

560-
int64_t t0 = ggml_time_ms();
561-
562560
std::set<std::string> ignore_tensors;
563561
tensors["alphas_cumprod"] = alphas_cumprod_tensor;
564562
if (use_tiny_autoencoder) {
@@ -656,11 +654,7 @@ class StableDiffusionGGML {
656654
ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM");
657655
}
658656

659-
int64_t t1 = ggml_time_ms();
660-
LOG_INFO("loading model from '%s' completed, taking %.2fs", SAFE_STR(sd_ctx_params->model_path), (t1 - t0) * 1.0f / 1000);
661-
662657
// check is_using_v_parameterization_for_sd2
663-
664658
if (sd_version_is_sd2(version)) {
665659
if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
666660
is_using_v_parameterization = true;

0 commit comments

Comments
 (0)