{This unit performs simple conflation on a word} {It was written by Stuart J. Barr in 1986} {It was written in C and has been hand-translated into PASCAL} {I make no appologies for the GOTOs and other bad programming practices} {Thats C for you} {For further information, see an LSP Manual (for string manipulation procedures)} {and Porter's paper on this conflation algorithm. It will explain the code quite well.} {By Mark Sanderson} {History : 12/5/89 : Unit set up} { : 22/11/89 : Conflator found to be making mistakes} { : 24/11/89 : Checked against a paper stating PorterŐs algorithm} { : 25/11/89 : Fixed a number of bugs, one due to mistake in original code the rest due to} { : mistakes in translating C to Pascal} { : 16/3/90 : Rare bug showed up and was fixed} unit ConflationUnit; interface var stopWords: array[1..319] of string[12]; prefixes: array[1..9] of string[6]; suffixes2: array[1..22, 1..2] of string[7]; suffixes3: array[1..8, 1..2] of string[7]; suffixes4: array[1..21] of string[5]; procedure InitConflater; function StripAffixes (strng: Str255): Str255; implementation {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure InitConflater; begin stopWords[1] := 'a'; stopWords[2] := 'about'; stopWords[3] := 'above'; stopWords[4] := 'across'; stopWords[5] := 'after'; stopWords[6] := 'afterwards'; stopWords[7] := 'again'; stopWords[8] := 'against'; stopWords[9] := 'all'; stopWords[10] := 'almost'; stopWords[11] := 'alone'; stopWords[12] := 'along'; stopWords[13] := 'already'; stopWords[14] := 'also'; stopWords[15] := 'although'; stopWords[16] := 'always'; stopWords[17] := 'am'; stopWords[18] := 'among'; stopWords[19] := 'amongst'; stopWords[20] := 'amoungst'; stopWords[21] := 'amount'; stopWords[22] := 'an'; stopWords[23] := 'and'; stopWords[24] := 'another'; stopWords[25] := 'any'; stopWords[26] := 'anyhow'; stopWords[27] := 'anyone'; stopWords[28] := 'anything'; stopWords[29] := 'anyway'; stopWords[30] := 'anywhere'; stopWords[31] := 'are'; stopWords[32] := 'around'; stopWords[33] := 'as'; stopWords[34] := 'at'; stopWords[35] := 'back'; stopWords[36] := 'be'; stopWords[37] := 'became'; stopWords[38] := 'because'; stopWords[39] := 'become'; stopWords[40] := 'becomes'; stopWords[41] := 'becoming'; stopWords[42] := 'been'; stopWords[43] := 'before'; stopWords[44] := 'beforehand'; stopWords[45] := 'behind'; stopWords[46] := 'being'; stopWords[47] := 'below'; stopWords[48] := 'beside'; stopWords[49] := 'besides'; stopWords[50] := 'between'; stopWords[51] := 'beyond'; stopWords[52] := 'bill'; stopWords[53] := 'both'; stopWords[54] := 'bottom'; stopWords[55] := 'but'; stopWords[56] := 'by'; stopWords[57] := 'call'; stopWords[58] := 'can'; stopWords[59] := 'cannot'; stopWords[60] := 'cant'; stopWords[61] := 'co'; stopWords[62] := 'computer'; stopWords[63] := 'con'; stopWords[64] := 'could'; stopWords[65] := 'couldnt'; stopWords[66] := 'cry'; stopWords[67] := 'de'; stopWords[68] := 'describe'; stopWords[69] := 'detail'; stopWords[70] := 'do'; stopWords[71] := 'done'; stopWords[72] := 'down'; stopWords[73] := 'due'; stopWords[74] := 'during'; stopWords[75] := 'each'; stopWords[76] := 'eg'; stopWords[77] := 'eight'; stopWords[78] := 'either'; stopWords[79] := 'eleven'; stopWords[80] := 'else'; stopWords[81] := 'elsewhere'; stopWords[82] := 'empty'; stopWords[83] := 'enough'; stopWords[84] := 'etc'; stopWords[85] := 'even'; stopWords[86] := 'ever'; stopWords[87] := 'every'; stopWords[88] := 'everyone'; stopWords[89] := 'everything'; stopWords[90] := 'everywhere'; stopWords[91] := 'except'; stopWords[92] := 'few'; stopWords[93] := 'fifteen'; stopWords[94] := 'fify'; stopWords[95] := 'fill'; stopWords[96] := 'find'; stopWords[97] := 'fire'; stopWords[98] := 'first'; stopWords[99] := 'five'; stopWords[100] := 'for'; stopWords[101] := 'former'; stopWords[102] := 'formerly'; stopWords[103] := 'forty'; stopWords[104] := 'found'; stopWords[105] := 'four'; stopWords[106] := 'from'; stopWords[107] := 'front'; stopWords[108] := 'full'; stopWords[109] := 'further'; stopWords[110] := 'get'; stopWords[111] := 'give'; stopWords[112] := 'go'; stopWords[113] := 'had'; stopWords[114] := 'has'; stopWords[115] := 'hasnt'; stopWords[116] := 'have'; stopWords[117] := 'he'; stopWords[118] := 'hence'; stopWords[119] := 'her'; stopWords[120] := 'here'; stopWords[121] := 'hereafter'; stopWords[122] := 'hereby'; stopWords[123] := 'herein'; stopWords[124] := 'hereupon'; stopWords[125] := 'hers'; stopWords[126] := 'herself'; stopWords[127] := 'him'; stopWords[128] := 'himself'; stopWords[129] := 'his'; stopWords[130] := 'how'; stopWords[131] := 'however'; stopWords[132] := 'hundred'; stopWords[133] := 'i'; stopWords[134] := 'ie'; stopWords[135] := 'if'; stopWords[136] := 'in'; stopWords[137] := 'inc'; stopWords[138] := 'indeed'; stopWords[139] := 'interest'; stopWords[140] := 'into'; stopWords[141] := 'is'; stopWords[142] := 'it'; stopWords[143] := 'its'; stopWords[144] := 'itself'; stopWords[145] := 'keep'; stopWords[146] := 'last'; stopWords[147] := 'latter'; stopWords[148] := 'latterly'; stopWords[149] := 'least'; stopWords[150] := 'less'; stopWords[151] := 'ltd'; stopWords[152] := 'made'; stopWords[153] := 'many'; stopWords[154] := 'may'; stopWords[155] := 'me'; stopWords[156] := 'meanwhile'; stopWords[157] := 'might'; stopWords[158] := 'mill'; stopWords[159] := 'mine'; stopWords[160] := 'more'; stopWords[161] := 'moreover'; stopWords[162] := 'most'; stopWords[163] := 'mostly'; stopWords[164] := 'move'; stopWords[165] := 'much'; stopWords[166] := 'must'; stopWords[167] := 'my'; stopWords[168] := 'myself'; stopWords[169] := 'name'; stopWords[170] := 'namely'; stopWords[171] := 'neither'; stopWords[172] := 'never'; stopWords[173] := 'nevertheless'; stopWords[174] := 'next'; stopWords[175] := 'nine'; stopWords[176] := 'no'; stopWords[177] := 'nobody'; stopWords[178] := 'none'; stopWords[179] := 'noone'; stopWords[180] := 'nor'; stopWords[181] := 'not'; stopWords[182] := 'nothing'; stopWords[183] := 'now'; stopWords[184] := 'nowhere'; stopWords[185] := 'of'; stopWords[186] := 'off'; stopWords[187] := 'often'; stopWords[188] := 'on'; stopWords[189] := 'once'; stopWords[190] := 'one'; stopWords[191] := 'only'; stopWords[192] := 'onto'; stopWords[193] := 'or'; stopWords[194] := 'other'; stopWords[195] := 'others'; stopWords[196] := 'otherwise'; stopWords[197] := 'our'; stopWords[198] := 'ours'; stopWords[199] := 'ourselves'; stopWords[200] := 'out'; stopWords[201] := 'over'; stopWords[202] := 'own'; stopWords[203] := 'part'; stopWords[204] := 'per'; stopWords[205] := 'perhaps'; stopWords[206] := 'please'; stopWords[207] := 'put'; stopWords[208] := 'rather'; stopWords[209] := 're'; stopWords[210] := 'same'; stopWords[211] := 'see'; stopWords[212] := 'seem'; stopWords[213] := 'seemed'; stopWords[214] := 'seeming'; stopWords[215] := 'seems'; stopWords[216] := 'serious'; stopWords[217] := 'several'; stopWords[218] := 'she'; stopWords[219] := 'should'; stopWords[220] := 'show'; stopWords[221] := 'side'; stopWords[222] := 'since'; stopWords[223] := 'sincere'; stopWords[224] := 'six'; stopWords[225] := 'sixty'; stopWords[226] := 'so'; stopWords[227] := 'some'; stopWords[228] := 'somehow'; stopWords[229] := 'someone'; stopWords[230] := 'something'; stopWords[231] := 'sometime'; stopWords[232] := 'sometimes'; stopWords[233] := 'somewhere'; stopWords[234] := 'still'; stopWords[235] := 'such'; stopWords[236] := 'system'; stopWords[237] := 'take'; stopWords[238] := 'ten'; stopWords[239] := 'than'; stopWords[240] := 'that'; stopWords[241] := 'the'; stopWords[242] := 'their'; stopWords[243] := 'them'; stopWords[244] := 'themselves'; stopWords[245] := 'then'; stopWords[246] := 'thence'; stopWords[247] := 'there'; stopWords[248] := 'thereafter'; stopWords[249] := 'thereby'; stopWords[250] := 'therefore'; stopWords[251] := 'therein'; stopWords[252] := 'thereupon'; stopWords[253] := 'these'; stopWords[254] := 'they'; stopWords[255] := 'thick'; stopWords[256] := 'thin'; stopWords[257] := 'third'; stopWords[258] := 'this'; stopWords[259] := 'those'; stopWords[260] := 'though'; stopWords[261] := 'three'; stopWords[262] := 'through'; stopWords[263] := 'throughout'; stopWords[264] := 'thru'; stopWords[265] := 'thus'; stopWords[266] := 'to'; stopWords[267] := 'together'; stopWords[268] := 'too'; stopWords[269] := 'top'; stopWords[270] := 'toward'; stopWords[271] := 'towards'; stopWords[272] := 'twelve'; stopWords[273] := 'twenty'; stopWords[274] := 'two'; stopWords[275] := 'un'; stopWords[276] := 'under'; stopWords[277] := 'until'; stopWords[278] := 'up'; stopWords[279] := 'upon'; stopWords[280] := 'us'; stopWords[281] := 'very'; stopWords[282] := 'via'; stopWords[283] := 'was'; stopWords[284] := 'we'; stopWords[285] := 'well'; stopWords[286] := 'were'; stopWords[287] := 'what'; stopWords[288] := 'whatever'; stopWords[289] := 'when'; stopWords[290] := 'whence'; stopWords[291] := 'whenever'; stopWords[292] := 'where'; stopWords[293] := 'whereafter'; stopWords[294] := 'whereas'; stopWords[295] := 'whereby'; stopWords[296] := 'wherein'; stopWords[297] := 'whereupon'; stopWords[298] := 'wherever'; stopWords[299] := 'whether'; stopWords[300] := 'which'; stopWords[301] := 'while'; stopWords[302] := 'whither'; stopWords[303] := 'who'; stopWords[304] := 'whoever'; stopWords[305] := 'whole'; stopWords[306] := 'whom'; stopWords[307] := 'whose'; stopWords[308] := 'why'; stopWords[309] := 'will'; stopWords[310] := 'with'; stopWords[311] := 'within'; stopWords[312] := 'without'; stopWords[313] := 'would'; stopWords[314] := 'yet'; stopWords[315] := 'you'; stopWords[316] := 'your'; stopWords[317] := 'yours'; stopWords[318] := 'yourself'; stopWords[319] := 'yourselves'; prefixes[1] := 'kilo'; prefixes[2] := 'micro'; prefixes[3] := 'milli'; prefixes[4] := 'intra'; prefixes[5] := 'ultra'; prefixes[6] := 'mega'; prefixes[7] := 'nano'; prefixes[8] := 'pico'; prefixes[9] := 'pseudo'; suffixes2[1, 1] := 'ational'; suffixes2[1, 2] := 'ate'; suffixes2[2, 1] := 'tional'; suffixes2[2, 2] := 'tion'; suffixes2[3, 1] := 'enci'; suffixes2[3, 2] := 'ence'; suffixes2[4, 1] := 'anci'; suffixes2[4, 2] := 'ance'; suffixes2[5, 1] := 'izer'; suffixes2[5, 2] := 'ize'; suffixes2[6, 1] := 'iser'; suffixes2[6, 2] := 'ize'; suffixes2[7, 1] := 'abli'; suffixes2[7, 2] := 'able'; suffixes2[8, 1] := 'alli'; suffixes2[8, 2] := 'al'; suffixes2[9, 1] := 'entli'; suffixes2[9, 2] := 'ent'; suffixes2[10, 1] := 'eli'; suffixes2[10, 2] := 'e'; suffixes2[11, 1] := 'ousli'; suffixes2[11, 2] := 'ous'; suffixes2[12, 1] := 'ization'; suffixes2[12, 2] := 'ize'; suffixes2[13, 1] := 'isation'; suffixes2[13, 2] := 'ize'; suffixes2[14, 1] := 'ation'; suffixes2[14, 2] := 'ate'; suffixes2[15, 1] := 'ator'; suffixes2[15, 2] := 'ate'; suffixes2[16, 1] := 'alism'; suffixes2[16, 2] := 'al'; suffixes2[17, 1] := 'iveness'; suffixes2[17, 2] := 'ive'; suffixes2[18, 1] := 'fulness'; suffixes2[18, 2] := 'ful'; suffixes2[19, 1] := 'ousness'; suffixes2[19, 2] := 'ous'; suffixes2[20, 1] := 'aliti'; suffixes2[20, 2] := 'al'; suffixes2[21, 1] := 'iviti'; suffixes2[21, 2] := 'ive'; suffixes2[22, 1] := 'biliti'; suffixes2[22, 2] := 'ble'; suffixes3[1, 1] := 'icate'; suffixes3[1, 2] := 'ic'; suffixes3[2, 1] := 'ative'; suffixes3[2, 2] := ''; suffixes3[3, 1] := 'alize'; suffixes3[3, 2] := 'al'; suffixes3[4, 1] := 'alise'; suffixes3[4, 2] := 'al'; suffixes3[5, 1] := 'iciti'; suffixes3[5, 2] := 'ic'; suffixes3[6, 1] := 'ical'; suffixes3[6, 2] := 'ic'; suffixes3[7, 1] := 'ful'; suffixes3[7, 2] := ''; suffixes3[8, 1] := 'ness'; suffixes3[8, 2] := ''; suffixes4[1] := 'al'; suffixes4[2] := 'ance'; suffixes4[3] := 'ence'; suffixes4[4] := 'er'; suffixes4[5] := 'ic'; suffixes4[6] := 'able'; suffixes4[7] := 'ible'; suffixes4[8] := 'ant'; suffixes4[9] := 'ement'; suffixes4[10] := 'ment'; suffixes4[11] := 'ent'; suffixes4[12] := 'sion'; suffixes4[13] := 'tion'; suffixes4[14] := 'ou'; suffixes4[15] := 'ism'; suffixes4[16] := 'ate'; suffixes4[17] := 'iti'; suffixes4[18] := 'ous'; suffixes4[19] := 'ive'; suffixes4[20] := 'ize'; suffixes4[21] := 'ise'; end; {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure ToLowerCase (var kwd: Str255); var i: INTEGER; begin for i := 1 to LENGTH(kwd) do begin if kwd[i] in ['A'..'Z'] then kwd[i] := CHR(ORD(kwd[i]) + (ORD('a') - ORD('A'))); end; end; {ToLowerCase} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} function IsValid (l: CHAR): BOOLEAN; begin if l in ['a'..'z'] then isValid := TRUE else if l in ['A'..'Z'] then isValid := TRUE else if l in ['0'..'9'] then isValid := TRUE else isValid := FALSE; end; {of isValid} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure Clean (var kwd: Str255); var newKwd: Str255; i: INTEGER; begin newKwd := ''; for i := 1 to LENGTH(kwd) do begin if IsValid(kwd[i]) then newKwd := MyConcat(newKwd, kwd[i]); end; kwd := newKwd; end; {of Clean} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} function StopWordTest (bottom, top: INTEGER; word: Str255): BOOLEAN; var mid: INTEGER; midWord: Str255; begin if bottom > top then StopWordTest := FALSE else begin mid := ((top - bottom) div 2) + bottom; midWord := stopWords[mid]; if word < midWord then StopWordTest := StopWordTest(bottom, mid - 1, word) else if word > midWord then StopWordTest := StopWordTest(mid + 1, top, word) else StopWordTest := TRUE; end; end; {of StopWordTest} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure StripPrefixes (var strng: Str255); label 1; var i: INTEGER; begin for i := 1 to 9 do begin if Copy(strng, 1, LENGTH(prefixes[i])) = prefixes[i] then begin strng := Copy(strng, LENGTH(prefixes[i]) + 1, LENGTH(strng) - LENGTH(prefixes[i])); goto 1; end; end; 1: end; {of StripPrefixes} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} function HasSuffix (word: Str255; suffix: Str255; var stem: Str255): BOOLEAN; label 1; begin if LENGTH(word) <= LENGTH(suffix) then begin HasSuffix := FALSE; goto 1; end; if LENGTH(suffix) > 1 then if word[LENGTH(word) - 1] <> suffix[LENGTH(suffix) - 1] then begin HasSuffix := FALSE; goto 1; end; stem := Copy(word, 1, LENGTH(word) - LENGTH(suffix)); if MyConcat(stem, suffix) = word then HasSuffix := TRUE else HasSuffix := FALSE; 1: end; {of HasSuffix} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} function Vowel (ch, prev: CHAR): BOOLEAN; begin case ch of 'a', 'e', 'i', 'o', 'u': Vowel := TRUE; 'y': Vowel := not Vowel(prev, '?'); {Originally read É Vowel := (Vowel(prev, '?') = TRUE)} otherwise Vowel := FALSE; end; end; {of Vowel} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} function Cvc (strng: Str255): BOOLEAN; var len: INTEGER; l: CHAR; begin len := LENGTH(strng); if len < 3 then Cvc := FALSE else if (Vowel(strng[len], strng[len - 1]) = FALSE) and not (strng[len] in ['w'..'y']) and (Vowel(strng[len - 1], strng[len - 2]) = TRUE) then begin if len = 3 then begin if Vowel(strng[1], '?') = FALSE then Cvc := TRUE else Cvc := FALSE; end else begin if Vowel(strng[len - 2], strng[len - 3]) = FALSE then Cvc := TRUE else Cvc := FALSE; end; end else Cvc := FALSE; end; {of Cvc} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} function Measure (stem: Str255): INTEGER; label 1, 2; var i, count, len: INTEGER; begin len := LENGTH(stem); count := 0; i := 1; while i <= len do begin while i <= len do begin if i > 1 then begin if Vowel(stem[i], stem[i - 1]) = TRUE then goto 1; end else if Vowel(stem[i], '?') = TRUE then goto 1; i := i + 1; end; {of while} 1: i := i + 1; while i <= len do begin if i > 1 then begin if Vowel(stem[i], stem[i - 1]) = FALSE then goto 2; end else if Vowel(stem[i], '?') = FALSE then goto 2; i := i + 1; end; {of while} 2: if i <= len then begin count := count + 1; i := i + 1; end; end; {of while} Measure := count; end; {of Measure} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} function ContainsVowel (word: Str255): BOOLEAN; var i: INTEGER; stop: BOOLEAN; begin stop := FALSE; i := 1; while (not stop) and (i <= LENGTH(word)) do begin if i > 1 then begin if Vowel(word[i], word[i - 1]) = TRUE then stop := TRUE end else if Vowel(word[i], '?') = TRUE then stop := TRUE; i := i + 1; end; ContainsVowel := stop; end; {of ContainsVowel} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure Step1 (var strng: Str255); var stem: str255; len: INTEGER; begin {Beggining of Step 1a} if strng[LENGTH(strng)] = 's' then if (HasSuffix(strng, 'sses', stem) = TRUE) then strng := Copy(strng, 1, LENGTH(strng) - 2) else if (HasSuffix(strng, 'ies', stem) = TRUE) then strng := Copy(strng, 1, LENGTH(strng) - 2) else if strng[LENGTH(strng) - 1] <> 's' then {if its an 's' then lob if off} strng := Copy(strng, 1, LENGTH(strng) - 1); {End of Step 1a} {Beggining of Step 1b} if HasSuffix(strng, 'eed', stem) = TRUE then begin if Measure(stem) > 0 then strng := Copy(strng, 1, LENGTH(strng) - 1); end else if ((HasSuffix(strng, 'ed', stem) = TRUE) or (HasSuffix(strng, 'ing', stem) = TRUE)) and (ContainsVowel(stem) = TRUE) then begin strng := Copy(strng, 1, LENGTH(stem)); if (HasSuffix(strng, 'at', stem) = TRUE) or (HasSuffix(strng, 'bl', stem) = TRUE) or (HasSuffix(strng, 'iz', stem) = TRUE) then strng := MyConcat(strng, 'e') else begin len := LENGTH(strng); if len > 1 then begin if (strng[len] = strng[len - 1]) and (strng[len] <> 'l') and (strng[len] <> 's') and (strng[len] <> 'z') then strng := Copy(strng, 1, len - 1); end else if Measure(strng) = 1 then if Cvc(strng) = TRUE then strng := MyConcat(strng, 'e'); end; end; {End of Step 1b} {Beggining of Step 1c} if HasSuffix(strng, 'y', stem) then if ContainsVowel(stem) then strng[LENGTH(strng)] := 'i'; {End of Step 1c} end; {of Step1} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure Step2 (var strng: Str255); label 1; var stem: Str255; index: INTEGER; begin for index := 1 to 22 do if HasSuffix(strng, suffixes2[index][1], stem) = TRUE then if Measure(stem) > 0 then begin strng := MyConcat(stem, suffixes2[index][2]); goto 1; end; 1: end; {of Step2} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure Step3 (var strng: Str255); label 1; var stem: Str255; index: INTEGER; begin for index := 1 to 8 do if HasSuffix(strng, suffixes3[index][1], stem) = TRUE then if Measure(stem) > 0 then begin strng := MyConcat(stem, suffixes3[index][2]); goto 1; end; 1: end; {of Step3} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure Step4 (var strng: Str255); label 1; var index: INTEGER; stem: Str255; begin for index := 1 to 21 do if HasSuffix(strng, suffixes4[index], stem) = TRUE then if Measure(stem) > 1 then begin strng := stem; goto 1; end; 1: end; {of Step4} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure Step5 (var strng: Str255); var stem: Str255; begin {Beggining of Step 5a} if strng[LENGTH(strng)] = 'e' then if Measure(strng) > 1 then strng := Copy(strng, 1, LENGTH(strng) - 1) else if Measure(strng) = 1 then begin stem := Copy(strng, 1, LENGTH(strng) - 1); if Cvc(stem) = FALSE then strng := Copy(strng, 1, LENGTH(strng) - 1); end; {End of Step 5a} {Beggining of Step 5b} if (strng[LENGTH(strng)] = 'l') and (strng[LENGTH(strng) - 1] = 'l') and (Measure(strng) > 1) then strng := Copy(strng, 1, LENGTH(strng) - 1); {End of Step 5b} end; {of Step5} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} procedure StripSuffixes (var strng: Str255); begin Step1(strng); Step2(strng); Step3(strng); Step4(strng); {Step4(strng); duplicate call removed, thanks to student@medialab.nl} Step5(strng); end; {of StripSuffixes} {°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°} function StripAffixes (strng: Str255): Str255; begin ToLowerCase(strng); Clean(strng); if (strng <> '') and (Length(strng) > 2) then if not (StopWordTest(1, 319, strng)) then begin StripPrefixes(strng); if strng <> '' then StripSuffixes(strng); end else strng := '' else strng := ''; StripAffixes := strng; end; {of StripAffixes} end.