17
17
import java .lang .reflect .Type ;
18
18
import java .nio .file .Files ;
19
19
import java .nio .file .Path ;
20
- import java .util .*;
20
+ import java .util .ArrayList ;
21
+ import java .util .Arrays ;
22
+ import java .util .HashMap ;
23
+ import java .util .List ;
24
+ import java .util .Map ;
21
25
import java .util .regex .Pattern ;
22
26
import java .util .stream .Collectors ;
23
27
@@ -26,77 +30,157 @@ public class CategoryGenerator {
26
30
public static final Logger LOGGER = LoggerFactory .getLogger (MOD_ID );
27
31
private static final Gson GSON = new GsonBuilder ().setPrettyPrinting ().create ();
28
32
private static final Path CATEGORIES_PATH = Path .of ("config/CraftSense/categories.json" );
29
- private static final Pattern SPLIT_PATTERN = Pattern .compile ("[_\\ s]" );
33
+ private static final Pattern SPLIT_PATTERN = Pattern .compile ("[_\\ s]+" );
34
+ private static final double POSITION_THRESHOLD = 0.5 ;
35
+ private static final List <String > UNCOUNTABLE = Arrays .asList ("WOOL" , "DIRT" , "SAND" , "WATER" , "MILK" , "LAVA" , "FLESH" , "ICE" );
36
+ private static final double DYNAMIC_BONUS = 0.1 ;
30
37
31
38
public static void generateCategories () {
32
- Map <String , List <String >> categorizedItems = loadExistingCategories ();
33
-
34
- Map <String , List <String >> specificCategories = new HashMap <>();
35
- specificCategories .put ("TOOL" , Arrays .asList ("AXE" , "PICKAXE" , "SWORD" , "SHOVEL" ));
36
- specificCategories .put ("ARMOR" , Arrays .asList ("HELMET" , "CHESTPLATE" , "LEGGINGS" , "BOOTS" ));
37
- specificCategories .put ("SIGN" , Collections .singletonList ("SIGN" ));
38
- specificCategories .put ("STAIR" , Collections .singletonList ("STAIR" ));
39
- specificCategories .put ("DOOR" , Collections .singletonList ("DOOR" ));
40
- specificCategories .put ("FENCE" , Arrays .asList ("FENCE" , "FENCE_GATE" ));
41
- specificCategories .put ("BUTTON" , Collections .singletonList ("BUTTON" ));
42
- specificCategories .put ("PRESSURE_PLATE" , Collections .singletonList ("PRESSURE_PLATE" ));
43
- specificCategories .put ("SLAB" , Collections .singletonList ("SLAB" ));
44
- specificCategories .put ("TRAPDOOR" , Collections .singletonList ("TRAPDOOR" ));
45
- specificCategories .put ("BOAT" , Collections .singletonList ("BOAT" ));
39
+ List <ItemData > items = new ArrayList <>();
46
40
47
41
for (Item item : Registries .ITEM ) {
48
42
Identifier itemId = Registries .ITEM .getId (item );
49
43
String itemName = itemId .getPath ().toUpperCase ();
50
44
51
- if (isItemCategorized (categorizedItems , itemName )) {
52
- continue ;
45
+ if (itemName .equals ("AIR" )) continue ;
46
+
47
+ List <String > tokens ;
48
+
49
+ if (itemName .startsWith ("MUSIC_DISC" ) || itemName .startsWith ("DISC_" )) {
50
+ tokens = new ArrayList <>();
51
+ tokens .add ("MUSIC_DISC" );
52
+ } else {
53
+ tokens = Arrays .stream (SPLIT_PATTERN .split (itemName ))
54
+ .filter (s -> s .length () > 2 )
55
+ .collect (Collectors .toList ());
56
+ }
57
+
58
+ items .add (new ItemData (itemName , tokens ));
59
+ }
60
+
61
+ Map <String , GlobalStat > globalStats = new HashMap <>();
62
+ for (ItemData item : items ) {
63
+ item .generateNGrams ();
64
+
65
+ for (NGramCandidate ng : item .ngrams ) {
66
+ globalStats .computeIfAbsent (ng .phrase , k -> new GlobalStat ()).add (ng .normalizedPosition );
53
67
}
68
+ }
54
69
55
- List <String > keywords = Arrays .stream (SPLIT_PATTERN .split (itemName ))
56
- .filter (word -> word .length () > 2 )
57
- .collect (Collectors .toList ());
70
+ double maxCount = globalStats .values ().stream ().mapToDouble (gs -> gs .count ).max ().orElse (1 );
58
71
59
- boolean isInSpecificCategory = false ;
72
+ Map <String , String > itemCategoryMap = new HashMap <>();
73
+ Map <String , List <String >> existingCategories = loadExistingCategories ();
74
+ List <String > knownCategories = existingCategories .keySet ().stream ().map (String ::toUpperCase ).collect (Collectors .toList ());
60
75
61
- for (Map . Entry < String , List < String >> entry : specificCategories . entrySet () ) {
62
- String category = entry . getKey () ;
63
- List < String > categoryKeywords = entry . getValue () ;
76
+ for (ItemData item : items ) {
77
+ NGramCandidate bestCandidate = null ;
78
+ double bestScore = - 1 ;
64
79
65
- if (keywords .stream ().anyMatch (categoryKeywords ::contains )) {
66
- categorizedItems .computeIfAbsent (category , k -> new ArrayList <>()).add (itemName );
67
- isInSpecificCategory = true ;
80
+ for (NGramCandidate ng : item .ngrams ) {
81
+ GlobalStat stat = globalStats .getOrDefault (ng .phrase , new GlobalStat ());
82
+ double globalAvg = stat .getAverage ();
83
+ double bonus = 0 ;
84
+ String formattedCandidate = formatCategoryName (ng .phrase );
85
+
86
+ if (knownCategories .contains (formattedCandidate .toUpperCase ())) {
87
+ bonus += DYNAMIC_BONUS ;
68
88
}
69
- }
70
89
71
- if (!isInSpecificCategory ) {
72
- for (String keyword : keywords ) {
73
- categorizedItems .computeIfAbsent (keyword , k -> new ArrayList <>()).add (itemName );
90
+ double frequencyFactor = (stat .count / maxCount ) * 0.2 ;
91
+ double candidateScore = globalAvg >= POSITION_THRESHOLD ? ng .normalizedPosition + bonus + frequencyFactor : ng .normalizedPosition * 0.5 ;
92
+
93
+ if (candidateScore > bestScore ) {
94
+ bestScore = candidateScore ;
95
+ bestCandidate = ng ;
74
96
}
75
97
}
98
+
99
+ if (bestCandidate == null && !item .ngrams .isEmpty ()) {
100
+ bestCandidate = item .ngrams .get (item .ngrams .size () - 1 );
101
+ }
102
+
103
+ if (bestCandidate != null ) {
104
+ itemCategoryMap .put (item .name , bestCandidate .phrase );
105
+ }
106
+ }
107
+
108
+ Map <String , List <String >> categorizedItems = loadExistingCategories ();
109
+ for (Map .Entry <String , String > entry : itemCategoryMap .entrySet ()) {
110
+ String itemName = entry .getKey ();
111
+ String label = formatCategoryName (entry .getValue ());
112
+ categorizedItems .computeIfAbsent (label , k -> new ArrayList <>()).add (itemName );
76
113
}
77
114
78
115
saveCategoriesToFile (categorizedItems );
79
116
}
80
117
118
+ private static String formatCategoryName (String candidate ) {
119
+ String [] words = candidate .toLowerCase ().split ("[_\\ s]+" );
120
+ if (words .length == 0 ) return "" ;
121
+
122
+ for (int i = 0 ; i < words .length - 1 ; i ++) {
123
+ words [i ] = capitalize (words [i ]);
124
+ }
125
+
126
+ String last = words [words .length - 1 ];
127
+ if (!UNCOUNTABLE .contains (last .toUpperCase ())) {
128
+ last = pluralize (last );
129
+ }
130
+
131
+ words [words .length - 1 ] = capitalize (last );
132
+ return String .join (" " , words );
133
+ }
134
+
135
+ private static String capitalize (String word ) {
136
+ if (word .isEmpty ()) return word ;
137
+ return word .substring (0 , 1 ).toUpperCase () + word .substring (1 ).toLowerCase ();
138
+ }
139
+
140
+ private static String pluralize (String word ) {
141
+ if (word .endsWith ("s" )) return word ;
142
+ if (word .endsWith ("oo" )) return word + "s" ;
143
+ if (word .endsWith ("ch" ) || word .endsWith ("sh" ) || word .endsWith ("x" ) || word .endsWith ("z" )) return word + "es" ;
144
+ if (word .endsWith ("o" )) {
145
+ char before = word .charAt (word .length () - 2 );
146
+ if (!isVowel (before )) return word + "es" ;
147
+ return word + "s" ;
148
+ }
149
+
150
+ if (word .endsWith ("y" ) && word .length () > 1 && !isVowel (word .charAt (word .length () - 2 ))) return word .substring (0 , word .length () - 1 ) + "ies" ;
151
+ return word + "s" ;
152
+ }
153
+
154
+ private static boolean isVowel (char c ) {
155
+ return "aeiou" .indexOf (Character .toLowerCase (c )) != -1 ;
156
+ }
157
+
81
158
private static Map <String , List <String >> loadExistingCategories () {
82
- if (Files .exists (CATEGORIES_PATH )) {
83
- try (FileReader reader = new FileReader (CATEGORIES_PATH .toFile ())) {
84
- Type type = new TypeToken <Map <String , List <String >>>() {}.getType ();
85
- return GSON .fromJson (reader , type );
86
- } catch (IOException e ) {
87
- LOGGER .error ("Failed to load existing categories" , e );
159
+ Map <String , List <String >> map = new HashMap <>();
160
+
161
+ try {
162
+ Files .createDirectories (CATEGORIES_PATH .getParent ());
163
+ if (Files .exists (CATEGORIES_PATH )) {
164
+ try (FileReader reader = new FileReader (CATEGORIES_PATH .toFile ())) {
165
+ Type type = new TypeToken <Map <String , List <String >>>() {}.getType ();
166
+ map = GSON .fromJson (reader , type );
167
+
168
+ if (map == null ) {
169
+ map = new HashMap <>();
170
+ }
171
+ }
88
172
}
173
+ } catch (IOException e ) {
174
+ LOGGER .error ("Failed to load existing categories" , e );
89
175
}
90
- return new HashMap <>();
91
- }
92
176
93
- private static boolean isItemCategorized (Map <String , List <String >> categorizedItems , String itemName ) {
94
- return categorizedItems .values ().stream ().anyMatch (list -> list .contains (itemName ));
177
+ return map ;
95
178
}
96
179
97
180
private static void saveCategoriesToFile (Map <String , List <String >> categorizedItems ) {
98
181
try {
99
182
Files .createDirectories (CATEGORIES_PATH .getParent ());
183
+
100
184
try (FileWriter writer = new FileWriter (CATEGORIES_PATH .toFile ())) {
101
185
GSON .toJson (categorizedItems , writer );
102
186
LOGGER .info ("Categories successfully saved to {}" , CATEGORIES_PATH );
@@ -105,4 +189,54 @@ private static void saveCategoriesToFile(Map<String, List<String>> categorizedIt
105
189
LOGGER .error ("Failed to save categories to file" , e );
106
190
}
107
191
}
192
+
193
+ private static class ItemData {
194
+ String name ;
195
+ List <String > tokens ;
196
+ List <NGramCandidate > ngrams ;
197
+
198
+ ItemData (String name , List <String > tokens ) {
199
+ this .name = name ;
200
+ this .tokens = tokens ;
201
+ this .ngrams = new ArrayList <>();
202
+ }
203
+
204
+ void generateNGrams () {
205
+ int len = tokens .size ();
206
+ for (int n = 1 ; n <= 3 ; n ++) {
207
+ if (n > len ) break ;
208
+
209
+ for (int i = 0 ; i <= len - n ; i ++) {
210
+ double norm = (i + ((n + 1 ) / 2.0 )) / (double ) len ;
211
+ String phrase = String .join ("_" , tokens .subList (i , i + n ));
212
+
213
+ ngrams .add (new NGramCandidate (phrase , norm ));
214
+ }
215
+ }
216
+ }
217
+ }
218
+
219
+ private static class NGramCandidate {
220
+ String phrase ;
221
+ double normalizedPosition ;
222
+
223
+ NGramCandidate (String phrase , double normalizedPosition ) {
224
+ this .phrase = phrase ;
225
+ this .normalizedPosition = normalizedPosition ;
226
+ }
227
+ }
228
+
229
+ private static class GlobalStat {
230
+ double sum = 0 ;
231
+ int count = 0 ;
232
+
233
+ void add (double value ) {
234
+ sum += value ;
235
+ count ++;
236
+ }
237
+
238
+ double getAverage () {
239
+ return count == 0 ? 0 : sum / count ;
240
+ }
241
+ }
108
242
}
0 commit comments