#!/usr/bin/perl -w # # stem - gene@ology.net & John Nicholson # use strict; my $word = $ARGV[0] or die "Usage: ./stem string\n"; #my $word = 'xxaxxabxxabcxxaxxbcxx'; #my $word = 'xxaabccxx'; my $lex; @{ $lex }{ qw( a ab abc bc c ) } = (); # xx.a.xx. a.b. # xx.a.xx. ab. # xx. a.b.c. # xx. a.bc. # xx. ab.c. # xx.a. xx.b.c. # xx.a. xx.bc. # xx # # 2*3*2 = 12 possible recombinations? my $seen = {}; my @fragged = stem( $word ); print join( "\n", @fragged ), "\n"; print join( "\n", map { "$_: $seen->{$_}" } keys %$seen ), "\n"; exit; ################################## # sub stem { my $string = shift; my @stemmed = (); for( sort keys %$lex ) { my $i = index $string, $_; if( $i >= 0 ) { my $pre = substr $string, 0, $i; my $post = substr $string, $i + length( $_ ), length( $string ) - length( $_ ) - $i; print "$_ ($i): ", join( '.', $pre, $_, $post ), "\n"; $seen->{$pre}++ if $pre; stem( $pre ) unless $seen->{$pre}; $seen->{$post}++ if $post; stem( $post ) unless $seen->{$post}; # unshift @stemmed, stem( $pre ) if $pre; # push @stemmed, $_; # push @stemmed, stem( $post ) if $post; } } return @stemmed; }