/* author: Fotis Lazarinis (actually I translated from C to Java) date: June 1997 address: Psilovraxou 12, Agrinio, 30100 comments: Compile it, import the Porter class into you program and create an instance. Then use the stripAffixes method of this method which takes a String as input and returns the stem of this String again as a String. */ package IRUtilities; import java.io.*; class NewString { public String str; NewString() { str = ""; } } public class Porter { private String Clean( String str ) { int last = str.length(); Character ch = new Character( str.charAt(0) ); String temp = ""; for ( int i=0; i < last; i++ ) { if ( ch.isLetterOrDigit( str.charAt(i) ) ) temp += str.charAt(i); } return temp; } //clean private boolean hasSuffix( String word, String suffix, NewString stem ) { String tmp = ""; if ( word.length() <= suffix.length() ) return false; if (suffix.length() > 1) if ( word.charAt( word.length()-2 ) != suffix.charAt( suffix.length()-2 ) ) return false; stem.str = ""; for ( int i=0; i 0 ) { if ( vowel(stem.charAt(i),stem.charAt(i-1)) ) break; } else { if ( vowel(stem.charAt(i),'a') ) break; } } for ( i++ ; i < length ; i++ ) { if ( i > 0 ) { if ( !vowel(stem.charAt(i),stem.charAt(i-1)) ) break; } else { if ( !vowel(stem.charAt(i),'?') ) break; } } if ( i < length ) { count++; i++; } } //while return(count); } private boolean containsVowel( String word ) { for (int i=0 ; i < word.length(); i++ ) if ( i > 0 ) { if ( vowel(word.charAt(i),word.charAt(i-1)) ) return true; } else { if ( vowel(word.charAt(0),'a') ) return true; } return false; } private boolean cvc( String str ) { int length=str.length(); if ( length < 3 ) return false; if ( (!vowel(str.charAt(length-1),str.charAt(length-2)) ) && (str.charAt(length-1) != 'w') && (str.charAt(length-1) != 'x') && (str.charAt(length-1) != 'y') && (vowel(str.charAt(length-2),str.charAt(length-3))) ) { if (length == 3) { if (!vowel(str.charAt(0),'?')) return true; else return false; } else { if (!vowel(str.charAt(length-3),str.charAt(length-4)) ) return true; else return false; } } return false; } private String step1( String str ) { NewString stem = new NewString(); if ( str.charAt( str.length()-1 ) == 's' ) { if ( (hasSuffix( str, "sses", stem )) || (hasSuffix( str, "ies", stem)) ){ String tmp = ""; for (int i=0; i 0 ) { String tmp = ""; for (int i=0; i 0 ) { str = stem.str + suffixes[index][1]; return str; } } } return str; } private String step3( String str ) { String[][] suffixes = { { "icate", "ic" }, { "ative", "" }, { "alize", "al" }, { "alise", "al" }, { "iciti", "ic" }, { "ical", "ic" }, { "ful", "" }, { "ness", "" }}; NewString stem = new NewString(); for ( int index = 0 ; index 0 ) { str = stem.str + suffixes[index][1]; return str; } } return str; } private String step4( String str ) { String[] suffixes = { "al", "ance", "ence", "er", "ic", "able", "ible", "ant", "ement", "ment", "ent", "sion", "tion", "ou", "ism", "ate", "iti", "ous", "ive", "ize", "ise"}; NewString stem = new NewString(); for ( int index = 0 ; index 1 ) { str = stem.str; return str; } } } return str; } private String step5( String str ) { if ( str.charAt(str.length()-1) == 'e' ) { if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */ String tmp = ""; for ( int i=0; i 1) ) if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */ String tmp = ""; for ( int i=0; i= 1 ) str = step2( str ); if ( str.length() >= 1 ) str = step3( str ); if ( str.length() >= 1 ) str = step4( str ); if ( str.length() >= 1 ) str = step5( str ); return str; } public String stripAffixes( String str ) { str = str.toLowerCase(); str = Clean(str); if (( str != "" ) && (str.length() > 2)) { str = stripPrefixes(str); if (str != "" ) str = stripSuffixes(str); } return str; } //stripAffixes } //class