Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Why is the F1 score 0.0 after freeze fine-tuning? #154

Open
kuailexinmouren opened this issue Jan 24, 2025 · 0 comments
Open

Why is the F1 score 0.0 after freeze fine-tuning? #154

kuailexinmouren opened this issue Jan 24, 2025 · 0 comments

Comments

@kuailexinmouren
Copy link

After freeze fine-tuning, I obtained two folders: epoch-1-step-180 and epoch-2-step-360. I calculated the F1 score for epoch-2-step-360.
The F1 code is shown below.
`# -- coding:utf-8 --

@project: ChatGLM-Finetuning

@filename: predict_freeze

@author: 刘聪NLP

@zhihu: https://www.zhihu.com/people/LiuCongNLP

@contact: [email protected]

@time: 2023/4/5 11:12

"""
文件说明:

"""
import sys
sys.path.append('/root/autodl-tmp/chatglm2-6b')
import torch
import json
from modeling_chatglm import ChatGLMForConditionalGeneration
from tokenization_chatglm import ChatGLMTokenizer
from tqdm import tqdm
import time
import os
import argparse

def set_args():
parser = argparse.ArgumentParser()
parser.add_argument('--test_path', default='/root/autodl-tmp/ChatGLM-Finetuning/data/spo_0.json', type=str, help='')
parser.add_argument('--device', default='0', type=str, help='')
parser.add_argument('--model_dir',
default="/root/autodl-tmp/epoch-2-step-360/", type=str,
help='')
parser.add_argument('--max_len', type=int, default=768, help='')
parser.add_argument('--max_src_len', type=int, default=450, help='')
parser.add_argument('--prompt_text', type=str,
default="你现在是一个信息抽取模型,请你帮我抽取出关系内容为"性能故障", "部件故障", "组成"和 "检测工具"的相关三元组,三元组内部用"_"连接,三元组之间用\n分割。文本:",
help='')
parser.add_argument('--top_p', type=float, default=0.7, help='')
parser.add_argument('--do_sample', type=bool, default=False, help='')
parser.add_argument('--num_return_sequences', type=int, default=1, help='')
return parser.parse_args()

def main():
args = set_args()
model = ChatGLMForConditionalGeneration.from_pretrained(args.model_dir)
tokenizer = ChatGLMTokenizer.from_pretrained(args.model_dir)
model.half().to("cuda:{}".format(args.device))
model.eval()
save_data = []
f1 = 0.0
max_tgt_len = args.max_len - args.max_src_len - 3
s_time = time.time()
with open(args.test_path, "r", encoding="utf-8") as fh:
for i, line in enumerate(tqdm(fh, desc="iter")):
with torch.no_grad():
sample = json.loads(line.strip())
src_tokens = tokenizer.tokenize(sample["text"])
prompt_tokens = tokenizer.tokenize(args.prompt_text)

            if len(src_tokens) > args.max_src_len - len(prompt_tokens):
                src_tokens = src_tokens[:args.max_src_len - len(prompt_tokens)]

            tokens = prompt_tokens + src_tokens + ["[gMASK]", "<sop>"]
            input_ids = tokenizer.convert_tokens_to_ids(tokens)
            # input_ids = tokenizer.encode("帮我写个快排算法")

            input_ids = torch.tensor([input_ids]).to("cuda:{}".format(args.device))
            generation_kwargs = {
                "min_length": 5,
                "max_new_tokens": max_tgt_len,
                "top_p": args.top_p,
                "temperature": 0.95,
                "do_sample": args.do_sample,
                "num_return_sequences": args.num_return_sequences,
            }

            response = model.generate(input_ids, **generation_kwargs)
            res = []
            for i_r in range(generation_kwargs["num_return_sequences"]):
                outputs = response.tolist()[i_r][input_ids.shape[1]:]
                r = tokenizer.decode(outputs).replace("<eop>", "")
                res.append(r)
            pre_res = [rr for rr in res[0].split("\n") if len(rr.split("_")) == 3]
            real_res = sample["answer"].split("\n")
            same_res = set(pre_res) & set(real_res)
            if len(set(pre_res)) == 0:
                p = 0.0
            else:
                p = len(same_res) / len(set(pre_res))
            r = len(same_res) / len(set(real_res))
            if (p + r) != 0.0:
                f = 2 * p * r / (p + r)
            else:
                f = 0.0
            f1 += f
            save_data.append(
                {"text": sample["text"], "ori_answer": sample["answer"], "gen_answer": res[0], "f1": f})

e_time = time.time()
print("总耗时:{}s".format(e_time - s_time))
print(f1 / 50)
save_path = os.path.join(args.model_dir, "ft_pt_answer.json")
fin = open(save_path, "w", encoding="utf-8")
json.dump(save_data, fin, ensure_ascii=False, indent=4)
fin.close()

if name == 'main':
main()'
However, the results are shown as follows.
[27:55,.54s/it A decoder onlyzeneration results.padding sideleftwhen initialarchitecture icight paddingwas detected!ForcorrectpleasesetA decoder only architecture is being used, but right padding was detected! For correct generation results, please setpadding side 'leftinitializing the tokenizer.wheniter: 1439it 27:56,107s/it A decoder only architectureis beingdetected! For correcteneration results,padding side 'left’ when initialrigit padcingDleasesetA decoder only architecture is being used, but right padding waspadding side’leftwhen initializing the tokenizer.results..n ease setiter: 144lit [27:56,1.16s/it]总耗时:1676.2034997940063s
0.0

Do you know any possible reasons? If you could reply, I would greatly appreciate it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant