//WordFreqTest.java //read File of strings. determine fequency of each word. Similar to case study of chapter 8. //Usage: java WordFreqTest inputFileName >redirected_output_file //all-in-in file includes class WordFreq (string and int counter). // TreeSet used to hold WordFreq objects import java.io.*; import java.util.*; public class WordFreqTest { public static void main( String[] args ) { String fileName=null; BufferedReader wordFile; TreeSet wordFreqs = new TreeSet(); WordFreq nextWordFreq; String line, word; StringTokenizer tokens; int totalWords=0; if (args.length > 0) //there is a command line argument fileName = args[0]; else { System.out.println("Usage: java WordFreq inputFileName"); System.exit(1); } try { wordFile = new BufferedReader( new FileReader(fileName)); line = wordFile.readLine(); while (line != null) { //tokens = new StringTokenizer(line); //space separated //strip all punctuation: tokens = new StringTokenizer(line," \t\r\n\\\"!@#$%^&*()-_=+[]{}';:|/?.>,<`~"); while (tokens.hasMoreTokens()) { word = tokens.nextToken().toLowerCase(); totalWords++; nextWordFreq = new WordFreq(word); //add word if not already in tree. if (!wordFreqs.add(nextWordFreq)) { //if already in tree, retrieve view of it as subset of one, //access it as the first element and increment it. //???there's no other way to access an object in a TreeSet?? //the subSet returns from arg1 to arg2 not including arg2, //not including either if arg1=arg2! so make arg2 a bogus //wordfreq of same string but add nul char to it. SortedSet subOneWordFreq = wordFreqs.subSet(nextWordFreq,new WordFreq(nextWordFreq.wordIs()+"\0")); ((WordFreq)(subOneWordFreq.first())).another(); } } line = wordFile.readLine(); } wordFile.close(); } catch (IOException e) { System.out.println( "threw an exception...File does not exist?" ); } Iterator it = wordFreqs.iterator(); while (it.hasNext()) System.out.println(it.next()); System.out.println("total words: "+totalWords+" Different words: "+ wordFreqs.size()); } } //should be in a public class in its own file... //***needs to say implements Comparable so that works with TreeSet ?? class WordFreq implements Comparable { private String word; private int freq; public WordFreq(String newWord) { word = newWord; freq = 1; } public String wordIs() { return word; } public int freqIs() { return freq; } public String toString() { return word + " " + freq; } public void another() { freq++; } //*** TreeSet uses compareTo of objects to determine equality and relative order. //argument must be Object, so cast it to this class and use String's compareTo //to compare the String members public int compareTo(Object otherWordFreq) { return this.word.compareTo(((WordFreq)otherWordFreq).word); } }