#lang racket/base
(provide stem)
(define (string-downcase str)
(list->string
(map char-downcase
(string->list str))))
(define-syntax assert
(syntax-rules ()
((assert expr result)
(when (not (equal? expr result))
(for-each display `(
#\newline "failed assertion:" #\newline
expr #\newline "expected: " ,result
#\newline "returned: " ,expr #\newline))))))
(define (consonant? cs)
(case (car cs)
((#\a #\e #\i #\o #\u) #f)
((#\y) (if (null? (cdr cs)) #t (vowel? (cdr cs))))
(else #t)))
(define (vowel? cs) (not (consonant? cs)))
(define (measure cs)
(let loop ((m 0) (cs cs) (zs '()) (prev? #t))
(cond ((pair? cs)
(loop m (cdr cs) (cons (if (consonant? cs) #\c #\v) zs) prev?))
((null? zs) m)
((char=? (car zs) #\v) (loop m cs (cdr zs) #f))
((not prev?) (loop (+ m 1) cs (cdr zs) #t))
(else (loop m cs (cdr zs) #t)))))
(define (v-stem? cs)
(let loop ((cs cs))
(cond ((null? cs) #f)
((vowel? cs) #t)
(else (loop (cdr cs))))))
(define (dbl-cons? cs)
(cond ((or (null? cs) (null? (cdr cs))) #f)
((not (char=? (car cs) (cadr cs))) #f)
(else (consonant? cs))))
(define (cvc? cs)
(cond ((< (length cs) 3) #f)
((vowel? cs) #f)
((consonant? (cdr cs)) #f)
((vowel? (cddr cs)) #f)
((char=? (car cs) #\w) #f)
((char=? (car cs) #\x) #f)
((char=? (car cs) #\y) #f)
(else #t)))
(define (ends? cs str)
(let loop ((cs cs) (ss (reverse (string->list str))))
(cond ((null? cs) #f)
((null? ss) cs)
((char=? (car cs) (car ss))
(loop (cdr cs) (cdr ss)))
(else #f))))
(define (q str)
(lambda (cs)
(if (not (v-stem? cs))
(append (reverse (string->list str)) cs)
(cond ((ends? cs "at") => (s "ate"))
((ends? cs "bl") => (s "ble"))
((ends? cs "iz") => (s "ize"))
((and (dbl-cons? cs)
(not (member (car cs) '(#\l #\s #\z))))
(cdr cs))
((and (= (measure cs) 1) (cvc? cs))
(cons #\e cs))
(else cs)))))
(define (r str1 str2)
(lambda (cs)
(if (< 0 (measure cs))
(append (reverse (string->list str2)) cs)
(append (reverse (string->list str1)) cs))))
(define (s str)
(lambda (cs)
(append (reverse (string->list str)) cs)))
(define (t str)
(lambda (cs)
(if (< 1 (measure cs)) cs
(append (reverse (string->list str)) cs))))
(define (step1a cs) (cond ((null? cs) cs) ((null? (cdr cs)) cs)
((ends? cs "sses") => (s "ss"))
((ends? cs "ies" ) => (s "i" ))
((ends? cs "ss" ) => (s "ss"))
((ends? cs "s" ) => (lambda (cs) cs))
(else cs)))
(define (step1b cs) (cond ((null? cs) cs) ((null? (cdr cs)) cs)
((ends? cs "eed") => (r "eed" "ee"))
((ends? cs "ed" ) => (q "ed"))
((ends? cs "ing") => (q "ing"))
(else cs)))
(define (step1c cs) (cond ((null? cs) cs) ((null? (cdr cs)) cs)
((ends? cs "y") =>
(lambda (cs)
(if (v-stem? cs)
(cons #\i cs)
(cons #\y cs))))
(else cs)))
(define (step2 cs) (if (or (null? cs) (null? (cdr cs))) cs
(case (cadr cs)
((#\a) (cond ((ends? cs "ational") => (r "ational" "ate" ))
((ends? cs "tional" ) => (r "tional" "tion"))
(else cs)))
((#\c) (cond ((ends? cs "enci" ) => (r "enci" "ence"))
((ends? cs "anci" ) => (r "anci" "ance"))
(else cs)))
((#\e) (cond ((ends? cs "izer" ) => (r "izer" "ize" ))
(else cs)))
((#\g) (cond ((ends? cs "logi" ) => (r "logi" "log" ))
(else cs)))
((#\l) (cond ((ends? cs "bli" ) => (r "bli" "ble" ))
((ends? cs "alli" ) => (r "alli" "al" ))
((ends? cs "entli" ) => (r "entli" "ent" ))
((ends? cs "eli" ) => (r "eli" "e" ))
((ends? cs "ousli" ) => (r "ousli" "ous" ))
(else cs)))
((#\o) (cond ((ends? cs "ization") => (r "ization" "ize" ))
((ends? cs "ation" ) => (r "ation" "ate" ))
((ends? cs "ator" ) => (r "ator" "ate" ))
(else cs)))
((#\s) (cond ((ends? cs "alism" ) => (r "alism" "al" ))
((ends? cs "iveness") => (r "iveness" "ive" ))
((ends? cs "fulness") => (r "fulness" "ful" ))
((ends? cs "ousness") => (r "ousness" "ous" ))
(else cs)))
((#\t) (cond ((ends? cs "aliti" ) => (r "aliti" "al" ))
((ends? cs "iviti" ) => (r "iviti" "ive" ))
((ends? cs "biliti" ) => (r "biliti" "ble" ))
(else cs)))
(else cs))))
(define (step3 cs) (if (or (null? cs) (null? (cdr cs))) cs
(case (car cs)
((#\e) (cond ((ends? cs "icate" ) => (r "icate" "ic" ))
((ends? cs "ative" ) => (r "ative" "" ))
((ends? cs "alize" ) => (r "alize" "al" ))
(else cs)))
((#\i) (cond ((ends? cs "iciti" ) => (r "iciti" "ic" ))
(else cs)))
((#\l) (cond ((ends? cs "ical" ) => (r "ical" "ic" ))
((ends? cs "ful" ) => (r "ful" "" ))
(else cs)))
((#\s) (cond ((ends? cs "ness" ) => (r "ness" "" ))
(else cs)))
(else cs))))
(define (step4 cs) (if (or (null? cs) (null? (cdr cs))) cs
(case (cadr cs)
((#\a) (cond ((ends? cs "al" ) => (t "al" )) (else cs)))
((#\c) (cond ((ends? cs "ance" ) => (t "ance" ))
((ends? cs "ence" ) => (t "ence" )) (else cs)))
((#\e) (cond ((ends? cs "er" ) => (t "er" )) (else cs)))
((#\i) (cond ((ends? cs "ic" ) => (t "ic" )) (else cs)))
((#\l) (cond ((ends? cs "able" ) => (t "able" ))
((ends? cs "ible" ) => (t "ible" )) (else cs)))
((#\n) (cond ((ends? cs "ant" ) => (t "ant" ))
((ends? cs "ement" ) => (t "ement"))
((ends? cs "ment" ) => (t "ment" ))
((ends? cs "ent" ) => (t "ent" )) (else cs)))
((#\o) (cond ((ends? cs "ion" ) => (lambda (cs)
(if (and (or (char=? (car cs) #\s)
(char=? (car cs) #\t))
(< 1 (measure cs)))
cs
(append (reverse (string->list "ion")) cs))))
((ends? cs "ou" ) => (t "ou" )) (else cs)))
((#\s) (cond ((ends? cs "ism" ) => (t "ism" )) (else cs)))
((#\t) (cond ((ends? cs "ate" ) => (t "ate" ))
((ends? cs "iti" ) => (t "iti" )) (else cs)))
((#\u) (cond ((ends? cs "ous" ) => (t "ous" )) (else cs)))
((#\v) (cond ((ends? cs "ive" ) => (t "ive" )) (else cs)))
((#\z) (cond ((ends? cs "ize" ) => (t "ize" )) (else cs)))
(else cs))))
(define (step5a cs) (if (not (char=? (car cs) #\e)) cs
(let ((m (measure (cdr cs))))
(if (< 1 m) (cdr cs)
(if (and (= m 1) (not (cvc? (cdr cs)))) (cdr cs) cs)))))
(define (step5b cs) (if (and (< 1 (measure cs)) (char=? (car cs) #\l) (char=? (cadr cs) #\l))
(cdr cs) cs))
(define (stem word)
(if (< (string-length word) 3) (string-downcase word)
(list->string (reverse
(step5b (step5a (step4 (step3 (step2 (step1c (step1b (step1a
(reverse (string->list (string-downcase word)))))))))))))))
(define (test-stem)
(define (cs str) (reverse (string->list str)))
(assert (step1a (cs "caresses" )) (cs "caress" ))
(assert (step1a (cs "ponies" )) (cs "poni" ))
(assert (step1a (cs "ties" )) (cs "ti" ))
(assert (step1a (cs "caress" )) (cs "caress" ))
(assert (step1a (cs "cats" )) (cs "cat" ))
(assert (step1b (cs "feed" )) (cs "feed" ))
(assert (step1b (cs "agreed" )) (cs "agree" ))
(assert (step1b (cs "plastered" )) (cs "plaster" ))
(assert (step1b (cs "bled" )) (cs "bled" ))
(assert (step1b (cs "motoring" )) (cs "motor" ))
(assert (step1b (cs "sing" )) (cs "sing" ))
(assert (step1b (cs "conflated" )) (cs "conflate" ))
(assert (step1b (cs "troubled" )) (cs "trouble" ))
(assert (step1b (cs "sized" )) (cs "size" ))
(assert (step1b (cs "hopping" )) (cs "hop" ))
(assert (step1b (cs "tanned" )) (cs "tan" ))
(assert (step1b (cs "falling" )) (cs "fall" ))
(assert (step1b (cs "hissing" )) (cs "hiss" ))
(assert (step1b (cs "fizzed" )) (cs "fizz" ))
(assert (step1b (cs "failing" )) (cs "fail" ))
(assert (step1b (cs "filing" )) (cs "file" ))
(assert (step1c (cs "happy" )) (cs "happi" ))
(assert (step1c (cs "sky" )) (cs "sky" ))
(assert (step2 (cs "relational" )) (cs "relate" ))
(assert (step2 (cs "conditional" )) (cs "condition" ))
(assert (step2 (cs "rational" )) (cs "rational" ))
(assert (step2 (cs "valenci" )) (cs "valence" ))
(assert (step2 (cs "hesitanci" )) (cs "hesitance" ))
(assert (step2 (cs "digitizer" )) (cs "digitize" ))
(assert (step2 (cs "conformabli" )) (cs "conformable"))
(assert (step2 (cs "radicalli" )) (cs "radical" ))
(assert (step2 (cs "differentli" )) (cs "different" ))
(assert (step2 (cs "vileli" )) (cs "vile" ))
(assert (step2 (cs "analogousli" )) (cs "analogous" ))
(assert (step2 (cs "vietnamization" )) (cs "vietnamize" ))
(assert (step2 (cs "predication" )) (cs "predicate" ))
(assert (step2 (cs "operator" )) (cs "operate" ))
(assert (step2 (cs "feudalism" )) (cs "feudal" ))
(assert (step2 (cs "decisiveness" )) (cs "decisive" ))
(assert (step2 (cs "hopefulness" )) (cs "hopeful" ))
(assert (step2 (cs "callousness" )) (cs "callous" ))
(assert (step2 (cs "formaliti" )) (cs "formal" ))
(assert (step2 (cs "sensitiviti" )) (cs "sensitive" ))
(assert (step2 (cs "sensibiliti" )) (cs "sensible" ))
(assert (step3 (cs "triplicate" )) (cs "triplic" ))
(assert (step3 (cs "formative" )) (cs "form" ))
(assert (step3 (cs "formalize" )) (cs "formal" ))
(assert (step3 (cs "electriciti" )) (cs "electric" ))
(assert (step3 (cs "electrical" )) (cs "electric" ))
(assert (step3 (cs "hopeful" )) (cs "hope" ))
(assert (step3 (cs "goodness" )) (cs "good" ))
(assert (step4 (cs "revival" )) (cs "reviv" ))
(assert (step4 (cs "allowance" )) (cs "allow" ))
(assert (step4 (cs "inference" )) (cs "infer" ))
(assert (step4 (cs "airliner" )) (cs "airlin" ))
(assert (step4 (cs "gyroscopic" )) (cs "gyroscop" ))
(assert (step4 (cs "adjustable" )) (cs "adjust" ))
(assert (step4 (cs "defensible" )) (cs "defens" ))
(assert (step4 (cs "irritant" )) (cs "irrit" ))
(assert (step4 (cs "replacement" )) (cs "replac" ))
(assert (step4 (cs "adjustment" )) (cs "adjust" ))
(assert (step4 (cs "dependent" )) (cs "depend" ))
(assert (step4 (cs "adoption" )) (cs "adopt" ))
(assert (step4 (cs "homologou" )) (cs "homolog" ))
(assert (step4 (cs "communism" )) (cs "commun" ))
(assert (step4 (cs "activate" )) (cs "activ" ))
(assert (step4 (cs "angulariti" )) (cs "angular" ))
(assert (step4 (cs "homologous" )) (cs "homolog" ))
(assert (step4 (cs "effective" )) (cs "effect" ))
(assert (step4 (cs "bowdlerize" )) (cs "bowdler" ))
(assert (step5a (cs "probate" )) (cs "probat" ))
(assert (step5a (cs "rate" )) (cs "rate" ))
(assert (step5a (cs "cease" )) (cs "ceas" ))
(assert (step5b (cs "controll" )) (cs "control" ))
(assert (step5b (cs "roll" )) (cs "roll" ))
(assert (stem "relate" ) "relat" )
(assert (stem "probate" ) "probat" )
(assert (stem "conflate" ) "conflat" )
(assert (stem "pirate" ) "pirat" )
(assert (stem "prelate" ) "prelat" )
(assert (stem "derivate" ) "deriv" )
(assert (stem "activate" ) "activ" )
(assert (stem "demonstrate" ) "demonstr" )
(assert (stem "necessitate" ) "necessit" )
(assert (stem "renovate" ) "renov" )
(assert (stem "archprelate" ) "archprel" )
(assert (stem "generalizations" ) "gener" )
(assert (stem "oscillators" ) "oscil" )
)
(test-stem)