#!/usr/bin/perl -w
use strict;
use Math::BaseCalc;

# This is just an experiment in word partioning via "lexicographical
# iteration" - a hideously slow one.

=pod {{{
16:12  * ology puzzels over the series 1, 11, 291, 16_954, 1_776_965, ... and wonders how to describe and generate it.
16:13 <@lucs> ology: You checked the integer sequences site?
16:14 <@lucs> Not found there. Hmm...
16:14 <@ology> I didn't think it would be actually.
16:15 <@lucs> Is it that weird? :)
16:15 <@ology> It is a pretty cool series though. HOAS and nopaste will show the code...
...
16:19 <@lucs> Er. could you explain that in simple words?
16:20 <@lucs> The numbers, that is.
16:20 <@lucs> 5)ab  11)a.b  for example.
16:21 <@lucs> I see what you're doing with the word, but what do those numbers represent?
16:22 <@ology> I want to find every possible partition of a word. For "a" this is just "a". For "ab" this is "a" and "a.b". For "abc" this is "a", "a.bc", "ab.c", and "a.b.c", etc.
16:22 <@lucs> Yep, I got that.
16:23 <@lucs> By the way, the number of ways to do that has been studied a lot (I think).
16:23 <@ology> I wish I could find out all the ways...
16:24 <@lucs> This considers the number of ways: http://mathworld.wolfram.com/PartitionFunctionP.html
16:24 <@ology> lucs: The numbers represent the lexicographical ordering of every possible combination of every word part with a '.' to partion.
16:25 <@ology> The numbers are just the i-th step in the iteration where a "valid" word partitioning was found.
16:25 <@ology> I skip the invalid ones.
16:25 <@Averell> are you interested in the partitions or just the number?
16:25 <@ology> Averell: I am mostly interested int he partitions, but the number is interesting.
16:25 <@lucs> Can't some recursive function get you those partitions relatively quickly?
16:26 <@ology> lucs: I have a fugly, recursive function to do it in Lingua::TokenParse.
16:26 <@Averell> thats what i was thinking
16:26 <@Averell> sorta like mergesort
16:28 <@ology> Lingua::TokenParse::build_combinations, specifically.  (http://search.cpan.org/src/GENE/Lingua-TokenParse-0.12/TokenParse.pm) Yes. I made it up myself, because I could find no partitioning functions.
16:29 <@ology> I am reading the wolfram thing now. : )
16:29 <@lucs> Here's an idea:  Consider 'abcde'. Insert a 0 between each digit: 'a0b0c0d0e'. Now generate the sequence as you would count in binary: 'a0b0c0d1e', 'a0b0c1d0e'. Then replace all '0' with '', and all '1' with '.'.
16:30 <@lucs> (I forgot some ellipses there...)
16:30 <@ology> Ha! That is exactly what I thought would be the fastest. But my brain fogged over. Excellent.
16:31 <@lucs> Note that I wouldn't actually munge the strings that way, but you get the idea.
16:34 <@ology> lucs: I don't understand. You suggested a string munging, binary counting algorithm and then said you wouldn't actually use it? 
16:34 <@lucs> ology: Exactly :)
16:34 <@lucs> Th point is to keep track of where the '.' go by counting in binary.
16:34 <@ology> yep
16:35 <@lucs> (Maybe I would use it, but there could be other, more efficient, ways of implementing it.)
16:35 <@ology> I need to find those "more efficient" ways.
16:36 <@ology> Mine sucks rocks.
16:36 <@lucs> Oh, you know, this is quite different from that Partition Function thing, come to think of it.
16:38 <@lucs> ology: Okay. Given a string like 'abcde', what do you want the function to return?
16:39 <@ology> The list of all possible partitions.
16:39 <@lucs> So, for example,  foo('ab')  would return  ('ab', 'a.b')  ? (You really want those dots?)
16:40 <@lucs> Or maybe it could return  ['ab'], ['a', 'b']  ?
16:41 <@ology> lucs: The listref is preferrable.
16:41 <@ology> I just use the dots to make it easy to glance and understand for humanoids.

=cut }}}

my $s = shift || 'abcd';  # a, ab, abc, abcd, ...
my @s = split //, $s;

my $c = Math::BaseCalc->new(digits => [ '.', @s ]);

my $i = 0;
while (1) {
    # Get the i-th partition.
    my $x = $c->to_base($i);
    # Remove the .'s for comparison below.
    (my $y = $x) =~ s/\.//g;
    # Print the iteration and partition if we are looking at an
    # entire word and the partition does not contain consecutive dots
    # or dots at the ends of the string.
    printf "%7s) %s\n", $i, $x
        if $y eq $s &&
           $x !~ /\.\.+/ && $x !~ /^\./ && $x !~ /\.$/;
    # The last partition is each element of the word with a dot
    # between.
    last if $x eq join '.', @s;
    # Increment the loop.
    $i++;
}

__END__
       1) a
~
       5) ab
      11) a.b
~
      27) abc
      75) a.bc
      99) ab.c
     291) a.b.c
~
     194) abcd
     694) a.bcd
     894) ab.cd
     954) abc.d
    3394) a.b.cd
    3454) a.bc.d
    4454) ab.c.d
   16954) a.b.c.d
~
    1865) abcde
    8345) a.bcde
   10505) ab.cde
   11045) abc.de
   11165) abcd.e
   49385) a.b.cde
   49925) a.bc.de
   50045) a.bcd.e
   62885) ab.c.de
   63005) ab.cd.e
   66245) abc.d.e
  296165) a.b.c.de
  296285) a.b.cd.e
  299525) a.bc.d.e
  377285) ab.c.d.e
 1776965) a.b.c.d.e
~
   22875) abcdef
  123717) a.bcdef
  152529) ab.cdef
  158703) abc.def
  159879) abcd.ef
  160089) abcde.f
  858423) a.b.cdef
  864597) a.bc.def
  865773) a.bcd.ef
  865983) a.bcde.f
 1066281) ab.c.def
 1067457) ab.cd.ef
 1067667) ab.cde.f
 1110675) abc.d.ef
 1110885) abc.de.f
 1119117) abcd.e.f
 6007539) a.b.c.def
 6008715) a.b.cd.ef
 6008925) a.b.cde.f
 6051933) a.bc.d.ef
 6052143) a.bc.de.f
 6060375) a.bcd.e.f
 7463721) ab.c.d.ef
 7463931) ab.c.de.f
 7472163) ab.cd.e.f
 7774689) abc.d.e.f
~
So the upper and lower bound intervals are:
    [1, 1], [5, 11], [27, 291], [194, 16_954], [1_865, 1_776_965], [22875, ?]...