-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSynonym.java
More file actions
145 lines (134 loc) · 5.46 KB
/
Synonym.java
File metadata and controls
145 lines (134 loc) · 5.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import java.io.*;
import java.util.*;
import java.util.logging.*;
import ca.site.elkb.*;
public class Synonym {
public static void main(String[] args) {
GetRogetELKB();//Pre-load
if(args.length == 0) {
System.out.println("Usage: Synonym [filename | A sentance]");
} else {
if(args[0].indexOf(".") < args[0].length() - 1) {//Filename
for(String arg : args) {
System.out.println(arg);
List<String> originalWords = ToWordList(FileUtil.Read(arg));
String bestWords = ToString(PickBestWords(originalWords));
FileUtil.Write(arg + ".syn.txt", bestWords);
OneGramFrequencies lf = OneGramFrequencies.Get(true);
System.out.println(arg + ": " + lf.getStandardDeviation(ToString(originalWords))
+ " -> " + lf.getStandardDeviation(bestWords));
}
} else {
List<String> argList = new ArrayList<String>();
Collections.addAll(argList, args);
System.out.println(argList);
System.out.println("--------------------------------");
System.out.println(ToString(PickBestWords(argList)));
}
}
}
public static String ToString(List<String> wordList) {
StringBuffer sb = new StringBuffer();
for(String word : wordList) {
sb.append(word);
}
return sb.toString();
}
public static List<String> ToWordList(List<String> lines) {
List<String> list = new ArrayList<String>();
for(String line : lines) {
for(StringTokenizer st = new StringTokenizer(line, " .,;'", true); st.hasMoreTokens(); ) {
list.add(st.nextToken());
}
}
return list;
}
public static List<String> PickBestWords(List<String> wordList) {
List<String> bestWords = new ArrayList(wordList.size());
for(String word : wordList) {
bestWords.add(GetBest(word));
}
sLogger.fine("PickBestWords(" + wordList + ") " + bestWords);
return bestWords;
}
public static String GetBest(String original) {
String bestWord = bestWords.get(original);
if(bestWord != null) {
return bestWord;
}
List<String> wordList = GetWordList(original);
if(wordList != null) {
bestWord = PickBest(wordList);
if(original.equals(bestWord) || bestWord == null) {
sLogger.fine("GetBest(" + original + ") No replacement.");
bestWords.put(original, original);
return original;
} else {
sLogger.fine("GetBest(" + original + ") " + bestWord);
bestWords.put(original, bestWord);
return bestWord;
}
} else {
sLogger.fine("GetBest(" + original + ") No replacement.");
bestWords.put(original, original);
return original;
}
}
public static String PickBest(List<String> wordList) {
if(wordList == null || wordList.size() == 0) {
return null;
}
float bestStandardDeviation = 0.0f;
String bestWord = null;
OneGramFrequencies lf = OneGramFrequencies.Get(true);
for(String word : wordList) {
if(word.indexOf("\"") < 0 && word.indexOf(" ") < 0) {
float standardDeviation = lf.getStandardDeviation(word);
if(standardDeviation > bestStandardDeviation) {
bestStandardDeviation = standardDeviation;
bestWord = word;
}
}//Don't use quotes.
}
sLogger.fine("PickBest(" + wordList + ") " + bestWord + " (" + bestStandardDeviation + ")");
return bestWord;
}
public static List<String> GetWordList(String word) {
sLogger.fine("GetWordList(" + word + ")");
try {
RogetELKB elkb = GetRogetELKB();
ArrayList<String> refList = elkb.index.getStrRefList(word);
sLogger.fine("GetWordList refList=" + refList);
if(refList.size() == 0) {
return null;
}
String sRef = refList.get(0);
Reference ref = new Reference(sRef);
String sKey = ref.getRefName();
int iHead = ref.getHeadNum();
String sPOS = ref.getPos();
Head elkbHead = elkb.text.getHead(iHead);
Paragraph elkbPara = elkbHead.getPara(sKey, sPOS);
SG elkbSG = elkbPara.getSG(word);
List<SG> sgList = elkbPara.getSGList();
List<String> wordList = sgList.get(0).getWordList();
sLogger.fine("GetWordList(" + word + ") " + wordList);
return wordList;
} catch(Exception e) {
sLogger.info("Unable to GetWordList(" + word + ")");
sLogger.log(Level.FINE, "Unable to GetWordList(" + word + ")", e);
return null;
}
}
private static RogetELKB elkb;
private static Map<String,String> bestWords = new HashMap<String,String>();
private static OneGramFrequencies letterFrequencies;
private Logger logger = Logger.getLogger(getClass().getName());
private static Logger sLogger = Logger.getLogger(Synonym.class.getName());
private static RogetELKB GetRogetELKB() {
if(elkb == null) {
elkb = new RogetELKB(1911);
}
return elkb;
}
}