@@ -43,9 +43,7 @@ def __len__(self):
43
43
44
44
def __getitem__ (self , idx ):
45
45
txt = self .post_list [idx ]
46
- encodings_dict = self .tokenizer (
47
- txt , truncation = True , max_length = self .max_length , padding = "max_length"
48
- )
46
+ encodings_dict = self .tokenizer (txt , truncation = True , max_length = self .max_length , padding = "max_length" )
49
47
input_ids = torch .tensor (encodings_dict ["input_ids" ])
50
48
attn_masks = torch .tensor (encodings_dict ["attention_mask" ])
51
49
@@ -75,19 +73,11 @@ def make_text(post, summarize):
75
73
self .post_list .append (sample ["info" ]["post" ])
76
74
# NOTE: The chosen summary is always the first one, i.e. `sample["summaries"][0]`
77
75
if sample ["choice" ] == 0 :
78
- self .summaries_0 .append (
79
- make_text (sample ["info" ], sample ["summaries" ][0 ]["text" ])
80
- )
81
- self .summaries_1 .append (
82
- make_text (sample ["info" ], sample ["summaries" ][1 ]["text" ])
83
- )
76
+ self .summaries_0 .append (make_text (sample ["info" ], sample ["summaries" ][0 ]["text" ]))
77
+ self .summaries_1 .append (make_text (sample ["info" ], sample ["summaries" ][1 ]["text" ]))
84
78
else :
85
- self .summaries_0 .append (
86
- make_text (sample ["info" ], sample ["summaries" ][1 ]["text" ])
87
- )
88
- self .summaries_1 .append (
89
- make_text (sample ["info" ], sample ["summaries" ][0 ]["text" ])
90
- )
79
+ self .summaries_0 .append (make_text (sample ["info" ], sample ["summaries" ][1 ]["text" ]))
80
+ self .summaries_1 .append (make_text (sample ["info" ], sample ["summaries" ][0 ]["text" ]))
91
81
self .labels .append (0 )
92
82
93
83
def __len__ (self ):
@@ -113,7 +103,7 @@ def __init__(self, train_path, tokenizer, split, max_length=1024):
113
103
if split == "valid" :
114
104
df = df .sample (n = 5000 )
115
105
self .summarizes = []
116
- for ( i , row ) in df .iterrows ():
106
+ for i , row in df .iterrows ():
117
107
self .summarizes .append (f"Summarize: { row ['text' ]} . TL;DR: { row ['summary' ]} " )
118
108
self .tokenizer = tokenizer
119
109
self .max_length = max_length
@@ -125,9 +115,7 @@ def __len__(self):
125
115
126
116
def __getitem__ (self , idx ):
127
117
txt = self .summarizes [idx ]
128
- encodings_dict = self .tokenizer (
129
- txt , truncation = True , max_length = self .max_length , padding = "max_length"
130
- )
118
+ encodings_dict = self .tokenizer (txt , truncation = True , max_length = self .max_length , padding = "max_length" )
131
119
input_ids = torch .tensor (encodings_dict ["input_ids" ])
132
120
attn_masks = torch .tensor (encodings_dict ["attention_mask" ])
133
121
0 commit comments