# -*- perl -*-
# Options.pm $Id: Options.pm,v 1.8 1999/07/17 16:47:08 jens Exp $
# (C) Copyright 1998-1999 Jens G Jensen <jens@argaeus.ma.rhul.ac.uk>

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
# Or see http://www.gnu.org/copyleft/gpl.html




=pod

=head1 Documentation for Options.pm version $Id: Options.pm,v 1.8 1999/07/17 16:47:08 jens Exp $

=head2 What is Options.pm?

Options.pm is a package (actually a class (to be precise, a singleton class)) designed
to handle options for the epsmerge program.  However, there is nothing that binds Options.pm
to epsmerge; it may be used independently.

Options.pm has some advantages over Getopt::Long and friends.  It allows checking and
reformatting option values (through routines supplied by the programmer), it allows
default values to be specified by the programmer, or set by the user in a configuration
file.

=head2 Initializing Options

The first thing your program should do should be to call

	Options->new( vlist, \@ARGV, fname, string )

where vlist is a option/value list (see below) describing your options, their
default values, how to check them, etc, C<\@ARGV> is a reference to the array
(ususally C<@ARGV>) that you want to parse for options, and fname is the name
of the configuration file that the user (or the programmer) may want to supply.
String is an optional string argument that will be displayed before the options
summary when the function C<usage()> is called; you can use it to print out
version numbers, etc.  Inside the string, any occurrence of '$_' will be replaced
with the name of the program (as it was invoked, taken from $0).

=head2 The option/value list

The option/value list should be a list of array refs, each consisting of:

=over

=item 1

Short option name (e.g., '-o'), can be undef if not available.
It is highly recommended that short options start with a I<single> '-'
but they don't have to be just one letter (i.e., '-foo' is fine).
However, bundling short options as when calling perl is not supported
(i.e., '-baz' is not the same as '-b -a -z').  Also, short options are
always I<case sensitive>.

=item 2

Long option name (e.g., '--output-file'); can be undef.  It is
highly recommended that long options start with '--'.  When the
long options are parsed, they are I<case insensitive>.

=item 3

Value identifier a la Getopt::Long:

'=s' : mandatory string

':s' : optional string

Similarly with letters 'b' (boolean), 'i' (integer).  "" means no value.

=item 4

Optional subroutine ref, that can transform the value (e.g., length
specified in inches can be converted to cm).  This routine can also perform
sanity checks and should return undef if it doesn't like the value
that was passed to it (i.e, just a single return; statement at the end).

=item 5

Default value.  Don't make this undef.  This is the value that will be
associated with the option if the user does not type the option on the
command line I<and> has not associated a value to this option in the
configuration file.  Note that options classified (in (3) above) as
having optional parameters get a different value (based on the type of
the variable) if the user types the option on the command line but does
not supply a value.  This only applies to options with optional values;
if the value is mandatory then this supplied default value is ignored.
Also, it is I<not> tested with the subroutine from (4) (if available),
since it is assumed that you know what you are doing with this value.

=item 6

Optional help string that describes how to use this option.  If you do not
want a help string, it might be better to specify an empty string.

=back

=head2 The checking subroutine

The subroutine will be called with the following parameters:

=over

=item 1

The value corresponding to the option (usually this is the one you'd want to check.)

=item 2

The option tag (see below) (in case several options are checked by the same routine.)

=back

The subroutine should I<return> the desired value (unchanged or updated
or translated or transmogrified or what-have-you) as a I<scalar>, I<or>
return false if the value is not recognized (returning false is done by
just writing `return').  The subroutine should I<not> print anything
(error messages, or suchlike).

=head2 What is that "tag" you mention?

The tag is a string identifying the option to Options.pm itself and to the program
that uses Options.pm.  Suppose an option has long name '--gnusto-rezrov' and short
name '-gr'.  Then the tag will be the short name with the leading '-' stripped off,
i.e. 'gr'.  If there was no short name, then the tag will be the long name with the
leading '--' stripped of, in this case 'gnusto-rezrov'.  The short name will always
be preferred to generate a tag if it is available.  It is an error if there is
neither a short name nor a long name for a given option.

=head2 How to read option values

=over

=item 1

Create an instance of the Options class.

    my $opt = Options->new();

Since this is not the first time you call options (see section L<Initializing Options>),
you don't need to pass any parameters.  Moreover, you can create as many instances of
Options as you like; since the class is a singleton you will always get the same class,
and Perl will recycle them at some point after they go out of scope.

=item 2

Call the getopts method with a list of tags, and it returns the corresponding list of values
Example: if C<$opt> contains your reference from step 1,

	$opt->getopts('baz')

will get the value of the option with short name '-baz' (or, if there
was no such option, the value of the long option '--baz').  Or

	$Options->new()->getopts('foo', 'bar')

will return a list containing two items, the value of the options with
tags 'foo' and 'bar', respectively.

It is an error if you call C<getopts('quincunx')> and 'quincunx' was never specified
in the option/value list when you initialized Options.

=back

=head2 How to override user's values (or define new options on the fly):

=over

=item 1

Create an instance of the Options class.

=item 2

Call the setopts method with a list of tag => newvalue (i.e., a hash).

    Options->new()->setopts( foo => 4, bar => 'ping' );

Also available is the parseopts method:

	Options->new()->parseopts( foo => 4, bar => 'ping' );

The difference is that parseopts will send the values of the options through
the parser as if they were given on the command line.  See section L<Other stuff>
for further details.

=back

These values are not checked since the programmer is supposed to know what
(s)he is doing.

=head2 Other stuff to do with options

This version of the Options class remembers all the configuration data
that were passed to it when it was created (unless you specifically tell it
to forget all this data with the C<forget()> function call; see below).

Assuming

	$opt = Options->new();

you can call the following functions:

	$opt->usage();

will display a usage summary based on the help strings given when
Options was created.  If you had included the optional string when first
calling new(), C<usage()> will print the name of the program followed by
the string before the options summary is displayed.  The options are
displayed in the order they were defined.  As a special case, the magic
option B<-?> will display the usage information (and then exit the
program successfully).

	$opt->parseopts( ... );

takes the same arguments as setopts(), but sends each of the values through
the parse/check subroutine (just as the options read from the command line).
A warning is issued on STDERR if a value fails to pass the test and that
option will be left unchanged (or unset if it was not set before).  Note
that currently the value is not checked against the type identifier (item 3
in each option/value list); since parseopts() is called from within your
program.  The idea is to allow any transformations done by the subroutine.

	$opt->get_default_value( ... );

takes a list of tags as option (just like the getopts() function), and
returns the default values of the options that correspond to the tags,
either as specified in the configuration data that was used to construct
Options, or, possibly, overridden by data in the resource file.  For
example, if you have an option specifier in you configuration data that
looks like this:

	['-f', '--foo', ':s', \&check, 'sparkle', 'the foo option']

then the tag will be B<foo> and $opt->get_default_value('foo') will
return the string 'sparkle' (regardless of whether foo was set to
anything on the commmand line) I<unless> foo is also mentioned in the
resource file, in which case it the default value will be fetched from
there.

	$opt->forget();

This function call will make Options forget the configuration data; it will
then (eventually) be recycled by Perl's garbage collector.  You can call this
after creating Options if you know that you won't need the methods in this
section.

=head2 The configuration file

The configuration file consists of lines where each individual line may be either

=over

=item

a comment: starts with '#' or '!' and continues to the end of the line (backslash
continuation not allowed),

=item

blank,

=item

or a line consisting of either

	name=value

or

	name: value

There may be whitespaces before and after the '=' or ':'.  The name in
this case is the short or long option name with the leading '-' or '--'
stripped away (this could be, but is not necessarily, the same as the
"tag").  This method was chosen to be easier for the user to remember.
The value is checked just as a command line option would be checked.

Options looks for the configuration file in all Perl's include directories
(obtained from @INC), and, if available, in the user's home directory.

=head2 Example

Suppose your program initializes Options as follows:

	use Options;

	my $ver = 2.718281828;

	$opt1 = [ '-h', '--help', ':b', undef, 0, "Get help for my program" ];
	$opt2 = [ '-o', undef, '=s', \&check_filename, "stdout", "The name of the output file" ];
	$opt3 = [ undef, '--foo', ':i', \&check_number, -1, "foo-value" ];
	$opt4 = [ '-b', '--beep', ':i', undef, 'default', 'beep duration in milliseconds' ];

	my $opt = Options->new( [ $opt1, $opt2, $opt3, $opt4 ],
				\@ARGV,
				".foorc",
				"\$_ version $ver; usage:\n\t\$_ [opts] <files>" );

	if($opt->getopts('h')) {
	    $opt->usage();
	}

This makes Options recognise and parse the options (assuming you have defined
somewhere the hypothetical subroutines C<check_filename> and C<check_code>.)
Notice that not everybody would want help to be associated with '-h' so we
have to recognize the switch ourselves and call C<usage()> explicitly (see
output below).  However, the magical switch B<-?> will still cause the same
help message to be displayed.

Suppose further that the configuration file (".foorc") looks like this:

	# my configuration file
	o = beep.txt

and that the user calls your program (say it is called C<foo>) with the following
arguments:

	foo --foo 3 snap crackle pop

After the first call to Options->new(...), the Options class will now know the following
options and values:

	h => 0
	o => beep.txt
	foo => 3
	b => default

so that if you call

	Options->new()->getopts('o', 'foo')

you will get a list of "beep.txt" and 3.  Moreover, the remaining three string values,

	snap crackle pop

will still remain in @ARGV (because they are not be parsed as options).

Observe also that the value of the 'b' option is 'default', even if it declared
to be an integer.  The default value (in this case the string 'default') is not
checked; this can be useful since you might then later fall back to a hard-coded
value:

	$opt->setopts(b => 10) if $opt->getopts('b') eq 'default';

This will only ever be executed if the user does not specify a value on the
command line or in the config file (since the string 'default' is not a valid
integer).

Here is what a checking routine (continuing with the example above) might look
like:

	sub check_filename {
		my $fname = shift;
		$fname .= '.ext' unless $fname =~ /\.ext$/;
		if(-e ($fname . '.gz')) {
			# unzip file here
		}
		return $fname if -e $fname;
		return;		# error: file does not exist
	}

Finally, let us look at what happens when the user asks for help, or the function
C<usage()> is called from within the program (the program is still called C<foo>).
Every occurrence of '$_' inside the help string

	"\$_ version $ver; usage:\n\t\$_ [opts] <files>"

is expanded to give the name of the program; in this case, however, we had to escape
the $ with a backslash since we wanted the version variable to be interpolated.  (The
$_ is expanded only when usage() is called.)  Here is what the function produces:

	foo version 2.7182818; usage:
		foo [opts] <files>
	short    long               default  description
	-------- ------------------ -------- ------------------------------------------
	-h       --help             0        Get help for my program
	-o                          stdout   The name of the output file
		 --foo              -1       foo-value
	-b       --beep             default  beep duration in milliseconds

=cut

#`  argh! fontify happiness for some emacsen


package Options;

use strict;

use vars qw($Self @Usage_Options_Summary_Header);
use Carp;

$Self = 0;

# This variable is a list of four items; it is the only thing
# in Options.pm which is language specific.  It is used in, and
# only in, the usage() function.
@Usage_Options_Summary_Header = qw{short long default description};


sub new {
    my $class = shift;
    return $Self if ref $Self;
    $Self = _parse( @_ );
    bless $Self, $class;
    return $Self;
}

sub getopts {
    shift;
    if( wantarray ) {
	my @values = ();
	foreach (@_) {
	    my $val = $Self->{$_};
	    croak "Fatal error: Option '$_' not specified" unless defined $val;
	    push @values, $val;
	}
	return @values;
    }
    my $val = $Self->{ $_[0] };
    croak "Fatal error: option '$_[0]' not specified" unless defined $val;
    return $val;
}

# Program can "by hand" opdate or set new options to some value
sub setopts {
  shift;
  my %newstuff = @_;
  my ($k, $v);
  while( ($k, $v) = each( %newstuff ) ) {
    $Self->{ $k } = $v;
  }
}

sub parseopts {
  croak 'Options config data erased' unless defined $Self->{'- config -'};
  shift;
  my %newstuff = @_;
  my ($k, $v);
  while( ($k, $v) = each( %newstuff ) ) {
    if( defined $Self->{'- config -'}->{ $k } ) {
      my $pv = &{ $Self->{'- config -'}->{ $k }->[3] }( $v, $k );
      if( defined $pv ) {
	$v = $pv;
      }
      else {
	carp "Parsing option $v for tag $k failed; ignoring";
	next;
      }
    }
    $Self->{ $k } = $v;
  }
}

sub forget {
  undef $Self->{'- config -'};
  undef $Self->{'- order -'};
}

sub get_default_value {
    shift;
    my $spoz = $Self->{'- config -'};
    croak 'Options config data erased' unless defined $spoz;
    if( wantarray ) {
	my @values = ();
	foreach (@_) {
	    my $val = $spoz->{$_};
	    croak "Fatal error: Option '$_' not specified" unless defined $val;
	    $val = $val->[4];
	    push @values, $val;
	}
	return @values;
    }
    my $val = $Self->{ $_[0] };
    croak "Fatal error: Option '$_' not specified" unless defined $val;
    $val = $val->[4];
    return $val;
}

sub usage {
  croak 'Options config data erased' unless defined $Self->{'- config -'};
  my ($tag, @data);
  if( defined $Self->{'- usage -'} ) {
      my $ustr = $Self->{'- usage -'};
      ($_ = $0) =~ s:^.*/::;				# get filename into $_
      $ustr =~ s/\$_/$_/egs;				# repl '$_' with $_ (evaluate)
      print $ustr, "\n";
  }
  # observe: format string is precisely 79 characters (fields get truncated)
  format STDOUT =
@<<<<<<< @<<<<<<<<<<<<<<<<< @<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
@data
.
  @data = @Usage_Options_Summary_Header;
  write;
  @data = qw{-------- ------------------ -------- ------------------------------------------};
  write;
  foreach $tag ( @{$Self->{'- order -'}} ) {
    my $rec = $Self->{'- config -'}->{$tag};
    # short option, long option, help string
    @data = defined($rec->[0]) ? $rec->[0] : "";
    push @data, defined($rec->[1]) ? $rec->[1] : "";
    push @data, defined($rec->[4]) ? $rec->[4] : "";
    push @data, defined($rec->[5]) ? $rec->[5] : "";
    write;
  }
}

# _find_rc_file() tries to find the resource file; first by looking in @INC
# (should include the epsmerge .pm directories) and finally in the user's home directory.
# Um -- assumes that your path separation character is the right one ('/').
sub _find_rc_file {
    my $rcfilename = shift;
    return $rcfilename if( -e $rcfilename );
    my $path;
    foreach $path ( @INC ) {
	if( -e "$path/$rcfilename" ) {
	    return "$path/$rcfilename";
	}
    }
    if( -e "$ENV{HOME}/$rcfilename" ) {
	return "$ENV{HOME}/$rcfilename";
    }
    # print STDERR "Warning: Couldn't find resource file\n";
    return undef;
}

# Do essentially what _parse() does, except with the resource file.
# Gimme: reference to optioninfo array and the name of the resource file.
# I change optioninfo array with updated default values.
sub _parse_rc_file {
    my ($optref, $rcfilename) = @_;
    $rcfilename = _find_rc_file( $rcfilename );
    return { } unless defined $rcfilename;
    open(RCFILE, "<$rcfilename") or die "Couldn't open $rcfilename for reading\n";
    # %lookup serves as a name => data lookup as in _parse, but the names are different
    my %lookup = ( );
    foreach ( @$optref ) {
	my $name = $$_[0];	# short name
	if( defined $name ) {
	    $name =~ s/^-//;
	    $lookup{ $name } = $_;
	}
	$name = $$_[1];		# long name
	if( defined $name ) {
	    $name =~ s/^--//;
	    $lookup{ $name } = $_;
	}
    }
    while( <RCFILE> ) {
	next if /^\s*$/ || /^\s*\#/; # blank line or comment
	unless( /^\s*(\w+)\s*[=:]\s*(\S.*)$/ ) {
	    chop;
	    print STDERR "Warning: I don't understand `", $_, "' in resource file, ignoring\n";
	    next;
	}
	my $name = $1;
	my $val = $2;
	my $rec = $lookup{ $name };
	unless( $rec ) {
	    print STDERR "Warning: unknown option `$name' in resource file, ignoring\n";
	    next;
	}
	if( defined $$rec[3] ) {
	    $val = &{$$rec[3]}( $val, $$rec[-1] );
	    unless( defined $val ) {
		print STDERR "Warning: weird value for `$name' in resource file, ignoring\n";
		next;
	    }
	}
	$$rec[4] = $val;				# new default value
    }
    close RCFILE;
}

sub _parse {
    my ($optref, $argvref, $rcfilename) = @_;
    # lookup is a hash of option_name => ref_to_array
    # both for the long and the short option names
    my %lookup = ( );
    # config is for looking up configuration operation later,
    # whereas order remembers the input order for display in usage()
    # without using too much extra memory
    my $self = { '- config -' => {}, '- order -' => [] };
    # usage string
    $self->{'- usage -'} = $_[3] if defined $_[3];
    foreach ( @$optref ) {
	my $tag = $$_[0] || $$_[1];
	die "Options must have a tag" unless $tag;
	$tag =~ s/^--?//;
	push @$_, $tag;		# put tag at *end* of option record array
	$self->{'- config -'}->{$tag} = $_;
	push @{$self->{'- order -'}}, $tag;
	$lookup{ $$_[0] } = $_ if $$_[0]; # short name
	$lookup{ $$_[1] } = $_ if $$_[1]; # long name
    }
    while( @$argvref && $$argvref[0] =~ /^-/ ) {
	my $opt = shift @$argvref;
	last if $opt eq '--';
	$opt = lc( $opt ) if $opt =~ /^--/;
	# special magic option -?
	if($opt eq '-?') {
	    $Self = $self;				# pretend parsing is done
	    usage();					# call as non-method OK
	    exit;					# success
	}
	# $rec is the record describing the option
	my $rec = $lookup{ $opt };
	unless( $rec ) {
	    print STDERR "Error: can't recognize option $opt\n";
	    exit(5);
	}
	my $val;
	if( $$rec[2] =~ /^:/ ) {
	    # optional parameter; code not winning beauty contests
	  OPTIONAL:
	    {			# begin bare block
		# An optional value should not be an option name, but
		# *may* be a negative number!
		if( @$argvref && $argvref->[0] =~ /^([^-]|-\d|-\.\d)/ ) {
		    # a value; not the next option
		    $val = $argvref->[0];
		    # first, internal check; integer
		    if( $$rec[2] eq ":i" ) {
			goto DEFAULT unless $val =~ /^-?\d+$/;
		    }
		    # does check routine accept this value?
		    if( $$rec[3] ) {
			# pass to routine: value, tag
			my $tval = &{$$rec[3]}( $val, $$rec[-1] );
			if( defined $tval ) {
			    $val = $tval;
			    shift @$argvref;
			    last OPTIONAL;
			}
		    }
		    else {	# ! $$rec[3]
			shift @$argvref;
			last OPTIONAL;
		    }
		}
	      DEFAULT:
		# choose reasonable default (not necessarily the same as the specified default!)
		if( $$rec[2] eq ":s" ) {
		    $val = "";
		}
		elsif( $$rec[2] eq ":i" ) {
		    $val = 0;
		}
		elsif( $$rec[2] eq ":b" ) {
		    $val = 1;
		}
		else {
		    carp "Warning: don't know default for $$rec[2]";
		    $val = "";
		}
	    }			# end bare block
	}
	else {
	    # mandatory parameter
	    unless( @$argvref && $$argvref[0] ne '--' ) {
		print STDERR "Error: Option $opt is not followed by a value\n" ;
		exit(5);
	    }
	    $val = $$argvref[0];
	    if( $$rec[2] eq "=i" && $val !~ /-?\d+/ ) {
		print STDERR "Option $opt: $val is not an integer\n";
		exit(5);
	    }
	    if( $$rec[3] ) {
		# value *must* pass subroutine test if available
		my $tval = &{$$rec[3]}( $val, $$rec[-1] );
		unless( defined $tval ) {
		    print STDERR "Value $val for option $opt not recognized\n";
		    # print help string if available
		    print STDERR $$rec[5] if defined $$rec[5];
		    exit 2;
		}
		$val = $tval;
	    }
	    shift @$argvref;
	}
	$self->{ $$rec[-1] } = $val;
    }
    # Now check that all options are defined; those that are not get
    # values from the resource file (if available), or the supplied
    # default value.
    my $rec;
    _parse_rc_file( $optref, $rcfilename );
    foreach $rec ( @$optref ) {
	unless( defined $self->{ $$rec[-1] } ) {
	    my $val;
	    die "Every option should have a default" unless defined $$rec[4];
	    $self->{ $$rec[-1] } = $$rec[4];
	}
    }
    return $self;
}

1;
