/**********************************************************
  File:          porter.java                    
  Description:   Functions for applying the Porter 
                 transformation to a word.
                                                         
  Author:        Dana Vrajitoru  and Stuart J. Barr                      
  Organization:  IUSB                                    
  Updated:       February 2021
***********************************************************/

package hashTable;

public class Porter {

	static final int KEYWORDSIZE = 25;
	static final boolean PREFIXES = true;
	static final int BIG_KEYWORDSIZE = KEYWORDSIZE + 20;
	static String [] prefixes = { "kilo", "micro", "milli", "intra", "ultra",
                                  "mega", "nano", "pico", "pseudo"};
	
	// extract word stem
	static public String stripAffixes (String text) {
	    text = text.toLowerCase();
	    text = clean(text);
	    text = stripPrefixes(text);
	    text = stripSuffixes(text);
	    return text;
	    //text[KEYWORDSIZE] = '\0';
	} // stripAffixes()

	// remove everything that is not a letter or a digit
	static public String clean (String kwd) {
	    int i, last = kwd.length();
	    String result = "";
	    for ( i = 0 ; i < last ; i++ ) {
	        if ( isvalid(kwd.charAt(i)) ) 
	        	result = result + kwd.charAt(i);
	    }
	    return result;
	} // clean

	// returns 0 if the character is a letter or a digit, 1 otherwise
	static public boolean isvalid(char l) {
	    if ( (l >= 'a') && (l <= 'z') )
	        return true;
	    if ( (l >= 'A') && (l <= 'Z') )
	        return true;
	    if ( (l >= '0') && (l <= '9') )
	        return true;
	    return false;
	} // isvalid()

	// remove composing prefixes such as "kilo".
	static public String stripPrefixes (String text ) {
	    for ( int i = 0 ; i < prefixes.length ; i++ ) {
	    	text = text.replace(prefixes[i], "");
	    }
	    return text;
	} // stripPrefixes()

	// remove composing pieces at the end of the text, such as "ly"
	static public String stripSuffixes ( String text ) {
	    text = step1 ( text );
	    text = step2 ( text );
	    text = step3 ( text );
	    text = step4 ( text );
	    return step5 ( text );
	} // stripSuffixes()

	static public String step1 ( String text ) {
		char [] stem = new char[BIG_KEYWORDSIZE];
	    if ( last(text) == 's' ) {
	        if ( hasSuffix(text, "sses", stem) || hasSuffix(text, "ies", stem) )
	            text = removeEnd(text, 2);
	        else
	            if ( text.charAt(text.length() - 2) != 's' )
	                text = removeEnd(text, 1);
	    }
	    if ( hasSuffix(text,"eed",stem) == true ) {
	        if ( measure(stem) > 0 )
	            text = removeEnd(text, 1);
	    }
	    else {
	        if ( ( hasSuffix(text, "ed", stem) || hasSuffix(text, "ing", stem))
	             && containsVowel(String.valueOf(stem))) { 
	            text = text.substring(0, realSize(stem));
	            if ( hasSuffix(text, "at", stem) || hasSuffix(text, "bl", stem)
	                 || hasSuffix(text, "iz", stem)) {
	                text = text + "e";
	            }
	            else {
	                int length = text.length();
	                if ( text.charAt(length-1) == text.charAt(length-2)
	                     && last(text) != 'l'
	                     && last(text) != 's'
	                     && last(text) != 'z' )
	                    text = removeEnd(text, 1);
	                else
	                    if ( measure(text) == 1 ) {
	                        if ( cvc(text) ) {
	                            text = text + "e";
	                        }
	                    }
	            }
	        }
	    }
	    if ( hasSuffix(text, "y", stem) && containsVowel(String.valueOf(stem)) )
	        text = removeEnd(text, 1) + 'i';
	    return text;
	} // step_1()

	static public String step2 ( String text ) {
	    String [][] suffixes =  { { "ational", "ate" },
	                                    { "tional",  "tion" },
	                                    { "enci",    "ence" },
	                                    { "anci",    "ance" },
	                                    { "izer",    "ize" },
	                                    { "iser",    "ize" },
	                                    { "abli",    "able" },
	                                    { "alli",    "al" },
	                                    { "entli",   "ent" },
	                                    { "eli",     "e" },
	                                    { "ousli",   "ous" },
	                                    { "ization", "ize" },
	                                    { "isation", "ize" },
	                                    { "ation",   "ate" },
	                                    { "ator",    "ate" },
	                                    { "alism",   "al" },
	                                    { "iveness", "ive" },
	                                    { "fulness", "ful" },
	                                    { "ousness", "ous" },
	                                    { "aliti",   "al" },
	                                    { "iviti",   "ive" },
	                                    { "biliti",  "ble" } };
	    char [] stem = new char[BIG_KEYWORDSIZE];
	    int index;
	    for ( index = 0 ; index < suffixes.length ; index++ ) {
	        if ( hasSuffix ( text, suffixes[index][0], stem ) ) 
	        {
	            if ( measure ( String.valueOf(stem )) > 0 ) {
	            	text = toString(stem ) + suffixes[index][1];
	                return text;
	            }
	        }
	    }
	    return text;
	} // step2()

	static public String step3 ( String text ) {
	    String [][] suffixes = { { "icate", "ic" },
	                                   { "ative", "" },
	                                   { "alize", "al" },
	                                   { "alise", "al" },
	                                   { "iciti", "ic" },
	                                   { "ical",  "ic" },
	                                   { "ful",   "" },
	                                   { "ness",  "" }};
	    char [] stem = new char[BIG_KEYWORDSIZE];
	    int index;
	    for ( index = 0 ; index < suffixes.length ; index++ ) {
	        if ( hasSuffix ( text, suffixes[index][0], stem ) )
	            if ( measure ( String.valueOf(stem )) > 0 ) {
	                text = toString(stem) + suffixes[index][1];
	                return text;
	            }
	    }
	    return text;
	} // step3

	static public String step4 ( String text ) {
	    String [] suffixes = { "al", "ance", "ence", "er", "ic", "able",
	                                "ible", "ant", "ement", "ment", "ent", "sion", "tion",
	                                "ou", "ism", "ate", "iti", "ous", "ive", "ize", "ise"};
	    char [] stem = new char[KEYWORDSIZE];
	    int index;
	    for ( index = 0 ; index < suffixes.length; index++ ) {
	        if ( hasSuffix ( text, suffixes[index], stem ) )
	            if ( measure ( String.valueOf(stem )) > 1 ) {
	                text = toString(stem);
	                return text;
	            }
	    }
	    return text;
	} // step4()

	static public String step5 ( String text ) {
	    if ( last(text) == 'e' ) {
	        if ( measure(text) > 1 )
	            /* measure(text)==measure(stem) if ends in vowel */
	            text = removeEnd(text, 1);
	        else
	            if ( measure(text) == 1 ) {
	                String stem = removeEnd(text, 1);
	                if ( !cvc(stem) )
	                	text = removeEnd(text, 1);
	            }
	    }
	    if ( (last(text) == 'l')
	         && (text.charAt(text.length() - 2) == 'l')
	         && (measure(text) > 1) )
	    	text = removeEnd(text, 1);
	    return text;
	} // step5()

	// returns the last character in a string
	static public char last(String text) {
		return text.charAt(text.length() - 1);
	}
	
	// returns the size of the character array without the spaces
	static public int realSize(char [] text) {
		int i = 0;
		while (i < text.length && text[i] != '\0' && (int)(text[i]) > 32)
			i++;
		return i;
	}
	
	static public String toString(char [] text) {
		String result = "";
		int size = realSize(text);
		for (int i = 0; i < size; i++)
			result += text[i];
		return result;
	}
	
	// returns the text without chars characters at the end
	static public String removeEnd(String text, int chars) {
		return text.substring(0, text.length() - chars);
	}
	
	// checks if the word has a given suffix
	static public Boolean hasSuffix ( String word, String suffix, char [] stem) {
		if (!word.contains(suffix) || word.indexOf(suffix) != word.length() - suffix.length())
			return false;
	    if (suffix.length() > 1 && last(word) != last(suffix))
	        return false;
		copyArray(word.substring(0, word.length() - suffix.length()), stem);
	    stem[word.length() - suffix.length()] = '\0';
	    return true;
	} // hasSuffix()

	static public void copyArray(String source, char [] dest) {
		int i = 0;
		for (char ch: source.toCharArray()) {
			dest[i] = ch;
			i++;
		}
		dest[i] = '\0';
	}
	static public Boolean cvc ( String text ) {
	    int length = text.length();
	    if ( length < 3 )
	        return false;
	    if ( !vowel(text.charAt(length-1), text.charAt(length-2))
	         && (text.charAt(length-1) != 'w')
	         && (text.charAt(length-1) != 'x')
	         && (text.charAt(length-1) != 'y')
	         && (vowel(text.charAt(length-2), text.charAt(length-3)))
	         && ( ( length == 3 && !vowel(text.charAt(0),'a') )
	              || (!vowel(text.charAt(length-3),text.charAt(length-4)))))
	        return true;
	    else
	        return false;
	} // cvc()

	// returns true if the character is a vowel. It needs the previous
	// character because y is considered a vowel only when preceded by a
	// vowel.
	static public Boolean vowel ( char ch, char prev ) {
	    switch ( ch ) {
	    case 'a':
	    case 'e':
	    case 'i':
	    case 'o':
	    case 'u': 
	    	return true;
	    case 'y': 
	    	return vowel(prev,'?') ;
	    default : 
	    	return false;
	    }
	} // vowel()

	static public int measure(char [] stem) {
		return measure(String.valueOf(stem));
	}
	
	static public int measure ( String stem ) {
	    int i = 0, count = 0;
	    int length = stem.length();
	    while ( i < length ) {
	        for ( ; i < length ; i++ ) {
	            if ( i > 0 ) {
	                if ( vowel(stem.charAt(i), stem.charAt(i-1)) )
	                    break;
	            }
	            else {
	                if ( vowel(stem.charAt(i),'a') )
	                    break;
	            }
	        }
	        for ( i++ ; i < length ; i++ ) {
	            if ( i > 0 ) {
	                if ( !vowel(stem.charAt(i), stem.charAt(i-1)) )
	                    break;
	            }
	            else {
	                if ( !vowel(stem.charAt(i),'?') )
	                    break;
	            }
	        }
	        if ( i < length ) {
	            count++;
	            i++;
	        }
	    }
	    return count;
	} // measure()

	// checks if the word contains a vowel
	static public Boolean containsVowel ( String word )	{
	    int i;
	    for ( i = 0 ; i < word.length() ; i++ )
	        if ( i > 0 ) {
	            if ( vowel(word.charAt(i), word.charAt(i-1)) )
	                return true;
	        }
	        else {
	            if ( vowel(word.charAt(0), 'a') )
	                return true;
	        }
	    return false;
	} // containsVowel()
}