# -*- perl -*-
# Eps.pm $Id: Eps.pm,v 1.9 1999/11/13 16:53:44 jens Exp $
# Imported from epsmerge =>Id: Eps.pm,v 1.15 1999/01/30 15:42:28 jens Exp<=
# (C) Copyright Jens G Jensen <jens@argaeus.ma.rhul.ac.uk>
# This file is part of epssplit and is distributed under GNU GPL


package Eps;

use strict;
use Options;


# input: filename,
sub new {
    my ($class, $fname, ) = @_;
    # An eps has boundingbox, orientation (default portrait) and a filename
    # - also knows name of save variable
    my $self = { fn => $fname, save => 'eps_Inc_Save_the_State' };
    bless $self, $class;

    unless( open(HELLO, "<$fname") ) {
	print STDERR "Creating $class: can't open $fname for reading\n";
	exit(5);
    }
    $_ = <HELLO>;
    if( /^%!PS-Adobe-(\d+)\.(\d+) EPSF-\d+\.\d+/ ) {
    }
    elsif( /^%!/ ) {
	print STDERR "Warning: this could be postscript but doesn't look like encapsulated postscript\n";
	$self->{ 'pages' } = [ ];
    }
    else {
	print STDERR "It seems that $fname is not encapsulated postscript\n";
	exit(5);
    }

    # Simple parser: parse DSCs in the header looking in particular for the boundingbox.
    # Uh -- the parser is getting a bit ugly (and long)
    # atend says: 0 = ignore trailer, 1 = must read trailer, 2 = reading trailer,

    my $atend = 0;
    # emblevel counts depth of embedded documents
    my $emblevel = 0;
    # Reading_List specifies which DSCs we are looking for and their parsed
    # values (keeping the first in the Header or the last in the Trailer).
    # Each entry has a name as the key, a regexp saying how to parse each entry of the
    # value, and sometimes a default value, and an expected length-of-list.
    my %Reading_List = (
		        'BoundingBox:' => { re => '\d+', ll => 4 },
		        'Orientation:' => { re => 'Portrait|Landscape', df => ['Portrait'], ll =>1 },
		        'LanguageLevel:' => { re => '\d+', df => ['3.0'], ll => 1 },
		        'Extensions:' => { re => '.+', },
			'Title:' => { re => '.*', df => ['(no title)'] },
			'CreationDate:' => { re => '.+', },
			'Pages:' => { re => '\d+', df => [1], ll => 1, },
			'PageOrder:' => { re => 'Ascend|Descend|Special', df => ['Ascend'], ll =>1 },
		       );
    # All but the last of these store locations in the file;
    # $name remembers the name of the DSC that we are currently reading.
    my ($end_of_header, $trailer,
	$begin_setup, $end_setup,
	$begin_prolog, $end_prolog,
	$name);
    my $ppos = 0;			# previous position, i.e., (before) beginning of current line
  PARSE:
    while( <HELLO> ) {
	--$emblevel if /^%%EndDocument/ || /^%%EndData/;
	++$emblevel if /^%%BeginDocument/ || /^%%BeginData/;
	next if $emblevel;
	# is this a %%+ continuation of the last DSC comment?
	if( /^%%\+\s*(.+)$/ ) {
	    next if $name eq 'ignore';		# continuation of a comment we ignore
	    if( !defined $name || !defined $self->{$name} ) {
		print STDERR "Unknown continuation in line $\n";
	    }
	    else {
		push @{$self->{ $name }}, split(/\s+/, $1);
	    }
	    next;
	}

	if( !defined $end_of_header && &_endofheader($_) ) {
	    $end_of_header = /^%/ ? tell HELLO : $ppos;
	}
	elsif(/^%%Trailer/) {
	    $trailer = $ppos;
	    $atend |= 2;
	    $atend &= ~1;
	    next;
	}
	elsif(/^%%BeginProlog/) {
	    $begin_prolog = $ppos;
	    $atend |= 4;
	    next;
	}
	elsif(/^%%EndProlog/) {
	    $end_prolog = tell HELLO;
	    $atend &= ~4;
	    next;
	}
# 	elsif(/^%%BeginProcSet/ || /^%%BeginResource/ || $atend & 4) {
# 	    # This stuff needed for sloppy files that don't bother to %%BeginProlog
# 	    $atend |= 4;
# 	    $begin_prolog = tell HELLO unless defined $begin_prolog;
# 	    # Get the procedure/resource
# 	    my ($what) = /^%%Begin(\w+)/;
# 	    my $proc = $_;
# 	    while(<HELLO>) {
# 		s/(^|\W)showpage(\W|$)/$1$2/;
# 		last if /^%%EndProlog/;
# 		$proc .= $_;
# 		last if defined $what && /^%%End$what/;
# 	    }
# 	    &$store_proc($proc);
# 	    redo if /^%%EndProlog/;
# 	    next;
# 	}
	elsif(/^%%BeginSetup/) {
	    $begin_setup = $ppos;
	    next;
	}
	elsif(/^%%EndSetup/) {
	    $end_setup = tell HELLO;
	    next;
	}
	last if /^%%EOF/ && !Options->new()->getopts('ignore-eof');

	# identify comment
	my ($arg, $item);
	next unless ($name, $arg) = /^%%([a-zA-Z\.\?\!]+:?)\s*(.*)$/;
	# strip leading and trailing spaces
	$arg =~ s/^\s*//; $arg =~ s/\s*$//;
	$item = $Reading_List{ $name };
	unless( defined $item ) {
	    $name = 'ignore';
	    next;
	}
	if( $arg eq '(atend)' ) {
	    if( $atend & 2 ) {
		print STDERR "$fname: Huh? Can't have (atend) ref in the trailer!\n";
		next;
	    }
	    $atend |= 1;
	    $item->{ value } = '(atend)';
	    next;
	}
	if( $arg =~ /$item->{re}/ ) {
	    # The DSC that counts is the _first_ in _headers_, _last_ in _trailers_
	    # Here the trailer one (only if available and someone prompts us to read trailer)
	    # takes precedence over the header one
	    if( $self->{ $name } && !($atend & 2) ) {
		$name = 'ignore';
		next;
	    }
	    $self->{ $name } = [ split /\s+/, $arg ];
	}
	else {
	    print STDERR "$fname: Cannot read DSC $name";
	}
    }
    continue { $ppos = tell HELLO; }	# PARSE

    # Now we've done the header but we'll read on
    # to find all the pages if this is a non-E postscript.
    if( defined $self->{'pages'} ) {
	my $p = 0;
	if( ! defined $self->{'Pages:'} ) {
	    print STDERR "Warning: postscript file didn't tell me how many pages it had\n";
	}
	seek(HELLO, $end_of_header, 0) if defined $end_of_header;
	while(<HELLO>) {
	    if( /^%%Page: *(\S+) +(\d+)/ ) {
		push @{$self->{'pages'}}, [ $1, $2, tell HELLO ];
		++$p;
	    }
	}
# 	if( defined $self->{'Pages:'} && $self->{'Pages:'}->[0] != $p ) {
# 	    printf STDERR "Warning: $fname: expected %d page%s; found %d\n", \
# 		$self->{'Pages:'}->[0], $self->{'Pages:'}->[0] != 1 ? "s" : "", $p;
# 	}
    }

    close HELLO;
    print STDERR "$fname requires a trailer but does not have one" if $atend & 1;
    if( !defined $begin_prolog && defined $end_prolog ) {
	$begin_prolog = $end_of_header;
    }
    $self->{'filepos'} = [ $end_of_header, $trailer,
			   $begin_prolog, $end_prolog,
			   $begin_setup, $end_setup,
			 ];

    # Test data
    for (keys %Reading_List) {
	print STDERR "Warning: $fname: option $_ should have been specified in trailer\n"
	    if defined $self->{$_} && $self->{$_} eq '(atend)';
    }

    # check that everything from the reading-list was defined;
    # at least those that might be used as labels *must* be def'd
    foreach (keys %Reading_List) {
	my $ref = $Reading_List{$_};
	if( defined $self->{ $_ } ) {
	    # check that we got the expected number of entries
	    if( defined $Reading_List{ll} ) {
		if($Reading_List{ll} != @{$self->{ $_}}) {
		    printf STDERR "epssplit: $fname: expected %d elements for %s found %d\n", \
			$Reading_List{ll}, $_, scalar @{$self->{ $_}};
		    if(defined $Reading_List{df}) {
			print STDERR "\t...since there is a default, I'd rather pick that\n";
			goto DEFAULT;
		    }
		}
	    }
	    next;
	}
      DEFAULT:
	# The thing was not defined, find a default
	if( defined $ref->{df} ) {
	    $self->{ $_ } = $ref->{df};
	}
	elsif( $_ eq 'CreationDate:' ) {
	    # get file's mtime (last modified)
	    my $date = (stat( $self->{fn} ))[9];
	    # if no mtime, shouldn't ever happen, use now()
	    $date = scalar localtime( $date ? $date : time() ) unless $date;
	    $self->{$_} = [ split /\s+/, $date ];
	}
    }


    if( defined $self->{'pages'} && $self->{'PageOrder:'}->[0] ne 'Special' ) {
	# sort pages on second entry (first is label, second is physical page number)
	my @list = sort { $a->[1] <=> $b->[1] } @{$self->{'pages'}};
	if( $self->{'PageOrder:'}->[0] eq 'Descend' ) {
	    $self->{'pages'} = [ reverse @list ];
	}
	else {
	    $self->{'pages'} = [ @list ];
	}
    }

    return $self;
}

# getlabel returns a label string as follows:
# f: filename
# d: the date of creation (er, of the eps file)
# T: the title of the eps file, as described by the DSC
sub getlabel {
    my $self = shift;
    $_ = shift;
    if( /f/ ) {
	return $self->{fn};
    }
    elsif( /d/ ) {
	return join( ' ', @{$self->{'CreationDate:'}} );
    }
    elsif( /T/ ) {
	return join( ' ', @{$self->{'Title:'}} );
    }
    else {
	print STDERR "$self->{fn}: Warning: unknown label specifier $_ ignored\n";
    }
    return;
}

sub box {
    my $self = shift;
    my @bbox = @{$self->{ 'BoundingBox:' }};
    my ($w, $h) = ($bbox[2]-$bbox[0], $bbox[3]-$bbox[1]);
    if( $self->{'Orientation:'}->[0] eq 'Landscape' ) {
	my $tmp = $w;
	$w = $h;
	$h = $tmp;
    }
    return ($bbox[0], $bbox[1], $bbox[0]+$w, $bbox[1]+$h);
}

sub write_prolog {
    my $self = shift;
    return unless defined $self->{'filepos'}->[2] && defined $self->{'filepos'}->[3];
    open(DATA, "<$self->{fn}") or die "Couldn't open $self->{fn}";
    # write prolog
    _write_section(\*DATA, $self->{'filepos'}->[2], $self->{'filepos'}->[3]);
    close DATA;
}


sub write_setup {
    my $self = shift;
    return unless defined $self->{'filepos'}->[4] && defined $self->{'filepos'}->[5];
    open(DATA, "<$self->{fn}") or die "Couldn't open $self->{fn}";
    _write_section(\*DATA, $self->{'filepos'}->[4], $self->{'filepos'}->[5]);
    close DATA;
}

# Write the Trailer, hopefully cleaning up after the Setup code
sub write_trailer {
    my $self = shift;
    return unless defined $self->{'filepos'}->[1];
    open(DATA, "<$self->{fn}") or die "Couldn't open $self->{fn} for reading";
    _write_section(\*DATA, $self->{'filepos'}->[1], -1);
    close DATA;
}

# The physical size of the image (on the paper); Eps has no
# changes.
sub getsize {
  shift;
  return @_;
}

# write must be passed a boundingbox (integers), plus two offset values
# shifting the printing window to another part of the Eps file, and a
# label (which will be ignored).  It writes clipping things and the EPS
# file itself.
sub write {
    my $self = shift;
    my ($llx, $lly, $urx, $ury, $ox, $oy) = @_;

    open(DATA, "<$$self{fn}") or die $!;
    print <<HERE;
/$self->{save} save def
newpath $llx $lly moveto
$llx $ury lineto $urx $ury lineto $urx $lly lineto
closepath
clip
newpath
HERE
    printf "%d %d translate\n", $llx-$ox, $lly-$oy;
    my $sc = Options->new()->getopts('sc');
    print "$sc $sc scale\n" if $sc ne 'default';
    if( $self->{ 'Orientation:' }->[0] eq 'Landscape' ) {
	# note; this only works because we scale by same amount in x and y direction
	printf "0 %d translate\n", ${$self->{'BoundingBox:'}}[2]-${$self->{'BoundingBox:'}}[0];
	print "-90 rotate\n";
    }
    printf "%d %d translate\n", -${$self->{'BoundingBox:'}}[0], -${$self->{'BoundingBox:'}}[1];

    if( defined $self->{'filepos'}->[5] ) {
	seek DATA, $self->{'filepos'}->[5], 0; # end_setup
    }
    elsif( defined $self->{'filepos'}->[3] ) {
	seek DATA, $self->{'filepos'}->[3], 0; # end_prolog
    }
    elsif( defined $self->{'filepos'}->[0] ) {
	seek DATA, $self->{'filepos'}->[0], 0; # end_of_header
    }
    else {
	# shouldn't get here...
	die "Eep!  Not even end_of_header was defined!\n";
    }

    my $emblevel = 0;
    while(<DATA>) {
	--$emblevel if /^%%EndDocument/;
	++$emblevel if /^%%BeginDocument/;
	next if $emblevel && /^%%/;	# ignore embedded documents' DSC
	next if /^%%Page/;		# ignore embedded page thingies
	s/(^|\W)showpage(\W|$)/$1$2/;	# kill (isolated) showpage commands
	if( /^%%Trailer/ ) {
	    last;
	}
	last if /^%%EOF/ && !Options->new()->getopts('ignore-eof');
	print;
    }
    print "$self->{save} restore\n";
    close DATA;
    return;
}

# Return true if the input could be the first non-header line (*not* a method!)
sub _endofheader {
    my $data = shift;
    return ($data =~ /^%%EndComments/) || ($data !~ /^%\S/);
}

# Write a section of the file, given FILE *, start, end.  Ignore comments?
# end can be nonsense (like -1), in which case we read till EOF.
# However, if start or end are undefined, then nothing happens.
sub _write_section {
    my ($fh, $start, $end) = @_;
    return unless defined $start && defined $end;
    my $pos = tell $fh;
    seek $fh, $start, 0;
    until( eof($fh) || ($end != -1 && tell $fh >= $end ) ) {
	$_ = <$fh>;
	s/(^|\W)showpage(\W|$)/$1$2/;
	print;
    }
    seek $fh, $pos, 0;
}

package EpsWrap;

# package EpsWrap is a simple Eps-wrapper; it's interface is exactly the
# same as that of Eps.  It's raison d'etre is to have several EpsWrap'es
# sharing one Eps Cell such that if the Eps cell represents a postscript
# file then all EpsWrap's represent individual pages and they all share
# pointers to the same Eps.  This saves potentially lots of memory and
# time for the first-stage parsing of the file (only done once).

# input: eps cell reference and physical page number
sub new {
    my $class = shift;
    my $data = { 'eps' => $_[0], 'page' => $_[1] };
    return bless $data, $class;
}

sub pages {
    my $self = shift;
    return $self->{'eps'}->pages();
}

sub getlabel {
    my $self = shift;
    return $self->{'eps'}->getlabel(@_);
}

sub box {
    my $self = shift;
    return $self->{'eps'}->box(@_);
}

sub write {
    my $self = shift;
    return $self->{'eps'}->write(@_, $self->{'page'});
}

1;
