Skip to content

Commit a47ff98

Browse files
author
liyang
committed
update test scripts
1 parent 171ca1e commit a47ff98

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+369
-363
lines changed

compare/compare1.py

Lines changed: 150 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -405,36 +405,162 @@ def extract_number_from_format(format_str):
405405
print()
406406

407407
if image_results:
408-
image_results.sort(key=lambda x: extract_number_from_format(x['format']))
409-
print("🖼️ IMAGE EMBEDDING TESTS:")
410-
print("-" * 100)
411-
print(f"{'Test Name':<25} {'Cos Sim':<12} {'L2 Dist':<12} {'Norm Rel Err':<12} {'Status':<15}")
412-
print("-" * 100)
408+
# 按尺寸分类图像测试结果 - 通用方法
409+
import glob
410+
from PIL import Image
413411

414-
image_excellent = image_good = image_poor = image_nan = 0
415-
for result in image_results:
416-
test_name = f"{result['format']}_{result['directory']}"[:24]
417-
cos_sim = result['cos_sim']
418-
l2_dist = result['l2_dist']
419-
norm_rel_err = result['norm_rel_err']
412+
# 自动发现测试图片并建立映射
413+
def build_image_mapping():
414+
"""通用地构建图片索引映射,自动从文件系统获取图片信息"""
415+
mapping = {}
416+
test_img_dir = "/root/tmp/llama.cpp/test_images"
420417

421-
if np.isnan(cos_sim):
422-
status = "❌ NaN"
423-
image_nan += 1
424-
elif cos_sim >= 0.99:
425-
status = "✅ Excellent"
426-
image_excellent += 1
427-
elif cos_sim >= 0.95:
428-
status = "🟡 Good"
429-
image_good += 1
418+
if os.path.exists(test_img_dir):
419+
# 按文件名排序,确保与测试脚本的处理顺序一致
420+
image_files = sorted(glob.glob(os.path.join(test_img_dir, "*.{png,jpg,jpeg}")))
421+
# 处理通配符展开失败的情况
422+
if not image_files:
423+
for ext in ['png', 'jpg', 'jpeg']:
424+
image_files.extend(sorted(glob.glob(os.path.join(test_img_dir, f"*.{ext}"))))
425+
426+
for index, img_path in enumerate(image_files):
427+
if os.path.isfile(img_path):
428+
filename = os.path.basename(img_path)
429+
430+
try:
431+
# 使用PIL获取实际图片尺寸
432+
with Image.open(img_path) as img:
433+
width, height = img.size
434+
size_str = f"{width}x{height}"
435+
436+
mapping[index] = (filename, size_str)
437+
except Exception as e:
438+
print(f"Warning: Cannot read image {img_path}: {e}")
439+
# 尝试从文件名推断尺寸
440+
size_match = re.search(r'(\d+)x(\d+)', filename)
441+
if size_match:
442+
size_str = f"{size_match.group(1)}x{size_match.group(2)}"
443+
else:
444+
size_str = "unknown"
445+
mapping[index] = (filename, size_str)
446+
447+
return mapping
448+
449+
# 构建动态图片映射
450+
image_index_mapping = build_image_mapping()
451+
452+
# 为每个结果添加文件名和尺寸信息
453+
for result in image_results:
454+
# 从format中提取索引,如 png_0 -> 0, jpeg_14 -> 14
455+
format_parts = result['format'].split('_')
456+
if len(format_parts) >= 2 and format_parts[1].isdigit():
457+
index = int(format_parts[1])
458+
if index in image_index_mapping:
459+
filename, size_str = image_index_mapping[index]
460+
result['filename'] = filename
461+
result['size_str'] = size_str
462+
# 计算像素数用于排序
463+
if 'x' in size_str and size_str != "unknown":
464+
try:
465+
w, h = map(int, size_str.split('x'))
466+
result['pixel_count'] = w * h
467+
except ValueError:
468+
result['pixel_count'] = 0
469+
else:
470+
result['pixel_count'] = 0
471+
else:
472+
result['filename'] = f"unknown_{index}"
473+
result['size_str'] = "unknown"
474+
result['pixel_count'] = 0
430475
else:
431-
status = "❌ Poor"
432-
image_poor += 1
476+
result['filename'] = result['format']
477+
result['size_str'] = "unknown"
478+
result['pixel_count'] = 0
479+
480+
# 按尺寸分组
481+
size_groups = {}
482+
for result in image_results:
483+
size_str = result.get('size_str', 'unknown')
484+
if size_str not in size_groups:
485+
size_groups[size_str] = []
486+
size_groups[size_str].append(result)
487+
488+
# 按像素数排序尺寸组
489+
sorted_sizes = sorted(size_groups.keys(), key=lambda x:
490+
int(x.split('x')[0]) * int(x.split('x')[1]) if 'x' in x and x != 'unknown' else 0)
491+
492+
print("🖼️ IMAGE EMBEDDING TESTS (按尺寸分类):")
493+
print("-" * 120)
494+
print(f"{'Image File Name':<30} {'Size':<12} {'Format':<8} {'Cos Sim':<12} {'L2 Dist':<12} {'Norm Rel Err':<12} {'Status':<15}")
495+
print("-" * 120)
496+
497+
image_excellent = image_good = image_poor = image_nan = 0
498+
499+
for size_str in sorted_sizes:
500+
# 显示尺寸分组标题
501+
print(f"\n📏 {size_str} 尺寸图片:")
502+
print("-" * 60)
433503

434-
print(f"{test_name:<25} {cos_sim:<12.8f} {l2_dist:<12.6f} {norm_rel_err:<12.4f} {status:<15}")
504+
# 在每个尺寸组内按格式排序 (png, jpg, jpeg)
505+
def get_format_priority(result):
506+
filename = result.get('filename', '').lower()
507+
if filename.endswith('.png'):
508+
return 0
509+
elif filename.endswith('.jpg'):
510+
return 1
511+
elif filename.endswith('.jpeg'):
512+
return 2
513+
else:
514+
return 3
515+
516+
size_results = sorted(size_groups[size_str], key=get_format_priority)
517+
518+
for result in size_results:
519+
# 自动从文件名提取格式信息
520+
filename = result.get('filename', result['format'])
521+
file_ext = filename.split('.')[-1].lower() if '.' in filename else 'unknown'
522+
523+
# 标准化格式显示
524+
if file_ext == 'png':
525+
img_format = 'PNG'
526+
elif file_ext == 'jpg':
527+
img_format = 'JPG'
528+
elif file_ext == 'jpeg':
529+
img_format = 'JPEG'
530+
else:
531+
img_format = file_ext.upper() if file_ext != 'unknown' else 'Unknown'
532+
533+
test_name = filename[:29] # 使用文件名作为测试名
534+
cos_sim = result['cos_sim']
535+
l2_dist = result['l2_dist']
536+
norm_rel_err = result['norm_rel_err']
537+
538+
if np.isnan(cos_sim):
539+
status = "❌ NaN"
540+
image_nan += 1
541+
elif cos_sim >= 0.99:
542+
status = "✅ Excellent"
543+
image_excellent += 1
544+
elif cos_sim >= 0.95:
545+
status = "🟡 Good"
546+
image_good += 1
547+
else:
548+
status = "❌ Poor"
549+
image_poor += 1
550+
551+
print(f"{test_name:<30} {size_str:<12} {img_format:<8} {cos_sim:<12.8f} {l2_dist:<12.6f} {norm_rel_err:<12.4f} {status:<15}")
435552

436-
print("-" * 100)
553+
print("\n" + "-" * 120)
437554
print(f"Image Tests Summary: ✅ {image_excellent} excellent, 🟡 {image_good} good, ❌ {image_poor} poor, ❌ {image_nan} NaN")
555+
556+
# 尺寸性能分析
557+
print(f"\n📊 尺寸性能分析:")
558+
for size_str in sorted_sizes:
559+
size_results = size_groups[size_str]
560+
size_excellent = sum(1 for r in size_results if not np.isnan(r['cos_sim']) and r['cos_sim'] >= 0.99)
561+
size_total = len(size_results)
562+
accuracy_rate = size_excellent / size_total * 100 if size_total > 0 else 0
563+
print(f" {size_str:<12}: {size_excellent}/{size_total} excellent ({accuracy_rate:.1f}%)")
438564
print()
439565

440566
# 重新计算总体统计

compare/img/gelu/cpp_jpg_embd.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)