Skip to content

Commit 74244d8

Browse files
committed
update
1 parent 5546ca4 commit 74244d8

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

_bibliography/papers.bib

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
@inproceedings{he2024videoscore,
55
title = "VideoScore: Building Automatic Metrics to Simulate Fine-grained Human Feedback for Video Generation",
66
author = {Xuan He and Dongfu Jiang and Ge Zhang and Max Ku and Achint Soni and Sherman Siu and Haonan Chen and Abhranil Chandra and Ziyan Jiang and Aaran Arulraj and Kai Wang and Quy Duc Do and Yuansheng Ni and Bohan Lyu and Yaswanth Narsupalli and Rongqi Fan and Zhiheng Lyu and Bill Yuchen Lin and Wenhu Chen},
7-
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
7+
booktitle = "Proceedings of EMNLP",
88
month = nov,
99
year = "2024",
1010
arxiv = "2406.15252",
@@ -22,6 +22,7 @@ @inproceedings{he2024videoscore
2222
abbr={EMNLP 2024},
2323
bibtex_show={true},
2424
}
25+
2526
@inproceedings{Lu2024WildVisionEV,
2627
title={WildVision: Evaluating Vision-Language Models in the Wild with Human Preferences},
2728
author={Yujie Lu and Dongfu Jiang and Wenhu Chen and William Yang Wang and Yejin Choi and Bill Yuchen Lin},
@@ -43,24 +44,23 @@ @inproceedings{Lu2024WildVisionEV
4344
bibtex_show={true},
4445
}
4546

46-
4747
@inproceedings{Jiang2024GenAIAA,
4848
title={GenAI Arena: An Open Evaluation Platform for Generative Models},
49-
author={Dongfu Jiang and Max W.F. Ku and Tianle Li and Yuansheng Ni and Shizhuo Sun and Rongqi Fan and Wenhu Chen},
49+
author={Dongfu Jiang and Max Ku and Tianle Li and Yuansheng Ni and Shizhuo Sun and Rongqi Fan and Wenhu Chen},
5050
booltitle = "Proceedings of NeurIPS 2024 Datasets and Benchmarks Track",
5151
address = "Vancouver, Canada",
5252
month={Dec},
5353
year={2024},
5454
url={https://openreview.net/forum?id=0Gmi8TkUC7#discussion},
5555
abstract = {Generative AI has made remarkable strides to revolutionize fields such as image and video generation. These advancements are driven by innovative algorithms, architecture, and data. However, the rapid proliferation of generative models has highlighted a critical gap: the absence of trustworthy evaluation metrics. Current automatic assessments such as FID, CLIP, FVD, etc often fail to capture the nuanced quality and user satisfaction associated with generative outputs. This paper proposes an open platform \arena to evaluate different image and video generative models, where users can actively participate in evaluating these models. By leveraging collective user feedback and votes, \arena aims to provide a more democratic and accurate measure of model performance. It covers three arenas for text-to-image generation, text-to-video generation, and image editing respectively. Currently, we cover a total of 27 open-source generative models. \arena has been operating for four months, amassing over 6000 votes from the community. We describe our platform, analyze the data, and explain the statistical methods for ranking the models. To further promote the research in building model-based evaluation metrics, we release a cleaned version of our preference data for the three tasks, namely GenAI-Bench. We prompt the existing multi-modal models like Gemini, GPT-4o to mimic human voting. We compute the correlation between model voting with human voting to understand their judging abilities. Our results show existing multimodal models are still lagging in assessing the generated visual content, even the best model GPT-4o only achieves a Pearson correlation of 0.22 in quality subscore, and behave like random guessing in others.},
56-
github = "TIGER-AI-Lab/GenAI-Arena",
5756
abbr={NeurIPS 2024},
57+
github={TIGER-AI-Lab/GenAI-Arena},
5858
preview={genai-arena.png},
5959
arxiv={2406.04485},
6060
huggingface="https://huggingface.co/spaces/TIGER-Lab/GenAI-Arena",
6161
selected = true,
62+
num_co_first_author = {3},
6263
bibtex_show={true},
63-
num_co_first_author = 3,
6464
}
6565

6666
@article{jiang2024mantis,
@@ -87,7 +87,7 @@ @article{jiang2024mantis
8787

8888
@inproceedings{Ku2023VIEScoreTE,
8989
title={VIEScore: Towards Explainable Metrics for Conditional Image Synthesis Evaluation},
90-
author={Max W.F. Ku and Dongfu Jiang and Cong Wei and Xiang Yue and Wenhu Chen},
90+
author={Max Ku and Dongfu Jiang and Cong Wei and Xiang Yue and Wenhu Chen},
9191
booktitle = "Proceedings of ACL",
9292
publisher = "Association for Computational Linguistics",
9393
month = aug,

0 commit comments

Comments
 (0)