#!/usr/local/bin/perl -w
#
# style v0.5, Harry Plantinga, Nov. 8, 1998. This program may be 
# distributed and used under the terms of the Artistic License.
# 
# style: this program converts style="..." attributes in ThML documents
# to class="..." attributes and stylesheet entries.  
#
# First, style attributes that are default for an element are deleted. 
#
# Then, the if there are differences between remaining styles and the
# default styles for the element, a new stylesheet entry for the current
# document is added. 
#
# Bugs: should read and parse ThML stylesheet, not program it all in.
#
use strict;

my ($input, %styles, $sty, %seen, @id, %stylename, %stylesheet );
      # stylename gives a name to each style combination seen so far
      # so far and the class name associated with them.
my $styleid =  "s01";    #initial new class name
my $styleapp = "x01";    #initial new class suffix 
my $debug = 0;

&defaults;

while (<>) 
  { $input .= $_; }

#first delete styles from HR, verse
$input =~ s#(<(HR|verse)[^>]*)style=".*?"#$1#g;

#since footnotes are nested, handle them first, then escape them.
#then handle the rest of the document, and unescape them.

#just classify _contents_ of note
$input=~ s#(<note\s[^>]*?>)(.*?)(</note>)#$1.&escape(&classify($2)).$3#gse;
#$input=~ s#(<CITE\s[^>]*?>.*?</CITE>)#&escape(&classify($2))#gse;

# now classify rest of block-level elements
$input=~ s#(<(P|H[1-6]|attr|sectionInfo|l|l2|l3|term|def)[^>]*?>.*?</\2>)#&classify($1)#gse;
$input =~ s|&less-than;|<|g;

my $stylesheet = "<LINK rel=\"stylesheet\" type=\"text/css\" " .
                 "href=\"/css/thml098.css\">\n<STYLE type=\"text/css\">\n";
my $s;
foreach $s (sort keys %stylesheet) {
    $stylesheet .= "$s\t{ $stylesheet{$s} }\n";
}
$stylesheet .= "</STYLE>\n";
$input =~ s|(</ThML.head>)|$stylesheet$1|;

print $input;	
exit(0);


#---------------------subroutines-----------------
#defaults: load in default css element styles
sub defaults
{
  %styles = (
    "H1" => ["font-family: 'Palatino';", "font-size: 240%;",
	 "font-weight: bold;", "font-style: normal;", "text-align: center;"], 
    "H2" => ["font-family: 'Palatino';", "font-size: 180%;", 
	"font-weight: bold;", "font-style: normal;", "text-align: center;"], 
    "H3" => ["font-family: 'Palatino';", "font-size: 140%;", 
	"font-weight: bold;", "font-style: normal;", "text-align: center;"], 
    "H4" => ["font-family: 'Palatino';", "font-size: 130%;", 
	"font-weight: normal;", "font-style: italic;", "text-align: left;"], 
    "H5" => ["font-family: 'Palatino';", "font-size: 120%;", 
	"font-weight: normal;", "font-style: italic;", "text-align: left;"], 
    "H6" => ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: italic;", "text-align: left;"],
    "attr" => ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: right;"],
    "term" => ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: right;"],
    "def" => ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: right;"],
    "sectionInfo" => ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: italic;", "text-align: right;"],
    "Normal" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "P" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "HR" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "Continue" => ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "First"	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "Resume"	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "Footnote"=> ["font-family: 'Palatino';", "font-size: 100%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "CODE" 	=> ["font-family: 'Courier New';", "font-size: 100%;" ],
    "CITE" 	=> ["font-style: italic;" ],
    "SPAN" 	=> [ ],
    "l" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "l2" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "l3" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "list1" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "list2" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "list3" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "list4" 	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "listCont1"	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "listCont2"	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "listCont3"	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"],
    "listCont4"	=> ["font-family: 'Palatino';", "font-size: 110%;", 
	"font-weight: normal;", "font-style: normal;", "text-align: left;"] );
}


# classify:  this subroutine processes block-level elements. It changes
# styles to classes according to the following algorithm:
#
# (1) delete default styles for the element from all style="" attributes
# (2) delete non-default styles for the element from span styles.
# (3) if any span styles remain that apply to the entire element, merge
#     with element styles and delete span
# (4) if any spans remain that only embolden or italicize, change to <B>
#     or <I>
# (5) if styles are left for the element and 
#     (a) it has a class, create a new one with a name such as First-x01.
#         it should have old class styles + mods.
#     (b) it doesn't have a class, create one with a name such as s01.
# (6) for each remaining span with styles,
#     (a) if it has a class, modify, e.g. CITE-x01, and add to stylesheet
#     (b) if it has no class, create one, e.g. s01; add to stylesheet

sub classify
{
  $_ = shift;
# print "CLASSIFY: $_\n" if $debug;
  my ($s, $name, $first, @stylelist, @spans);
  if (m|.*<P.*<P|s) {
    die "Sanity check:  got two paragraphs!\n";
  }

# (1) delete default styles from all style elements
  my ($tag) = m|^<(\w*)|;
  my ($tagclass) = m|^<[^>]*?class="(.*?)"|;
  my $tagkey = $tag; 
  $tagkey = $tagclass if $tagclass;             #tagkey gives us defaults
  foreach $s (@{ $styles{$tagkey}}) 
    { s|$s||g; }                                #delete default styles
# print "After deleting defaults: $_\n" if $debug;

# (2) if any element styles are left over, delete from remaining styles
  my ($tagstyle) = m|^<[^>]*?style="\s*(.*?)\s*"|;
  @stylelist = split /;/, $tagstyle if $tagstyle and $tagstyle =~ /;/;
  foreach $s (@stylelist)
    { s|(.*?>.*?style="[^"]*)$s;|$1|gs; }
# print "After deleting elements from spans: $_\n";

# (3) merge whole-element span styles with element styles
  $_ = normalize($_);
  if ( m|<$tag[^>]*style=[^>]*><SPAN[^>]*style=[^>]*>[^>]*</SPAN></$tag>|) {
     print "Found element-wide style: $_\n" if $debug;
#    $debug=1;
     s|(style=".*?)(".*?)\s*style="(.*?)"|$1 $3$2|;
     print " --> changed to $_\n" if $debug;
     ($tagstyle) = m|^<[^>]*?style="\s*(.*?)\s*"|;
  }
  $_ = normalize($_);

# (4) change remaining spans that embolden or italicize to <B> or <I>
  s|<SPAN style="\s*font-weight:\s+bold;\s*">(.*?)</SPAN>|<B>$1</B>|g;
  s|<SPAN style="\s*font-style:\s+italic;\s*">(.*?)</SPAN>|<I>$1</I>|g;
# print "After adding <B> and <I>: $_\n";


# (5) if any element styles are left over, find or make a new class
  if (s|^(<[^>]*)style=".*?"|$1|) {	#if this tag had styles, delete them

# (5a): if there was a tag class
    if ($tagclass) {  
      print "Case 5a. Tag=$tag class=$tagclass.  " if $debug;
      print "Added styles: $tagstyle\n" if $debug;

      #first check if we have this class on record. If not, add.
      if (!$styles{$tagclass}->[0]) {
        print "Hmmm, seems to be a new class. Let's add it.\n" if $debug;
        my @a1 = split /;/, $tagstyle;
        my @a2;
        foreach $s (@a1) {			#for each style, 
          $s =~ s|^\s*(.*?)\s*$|$1;|;		#delete first&last spaces,
          push @{ $styles{$tagclass} }, $s;	#add to styles hash, 
        }
        $stylesheet{".".$tagclass} = $tagstyle;	#add stylesheet entry
        print "Stylesheet entry: .$tagclass { $tagstyle }\n" if $debug;

      } else {	                  #there was an existing class. Modify it.
        my $stykey = "$tagclass-$tagstyle";
        $stylename{$stykey} = $tagclass."-".$styleapp++ 
            unless $stylename{$stykey};
        $name = $stylename{$stykey};	
        my $newstyles = "";
         
        # add each default style to stylesheet entry unless 
        # it is overridden by new styles for this tag
        foreach $s (@{$styles{$tag}}) { 
          my ($stylekey) = $s =~ m|(.*?):|; 
          $newstyles .= $s . " " unless $tagstyle =~ m|$stylekey|;
        }
        $newstyles .= $tagstyle;
        $stylesheet{".".$name} = $newstyles unless $stylesheet{".".$name};
        print "Stylesheet entry 1: .$name { $newstyles }\n" if $debug;
        s|class="$tagclass"|class="$name"|;
      }
    }

# (5b): if there was no tag class
    else {				#there was no class
      print "Case 5b. Tag $tag; no class.  " if $debug;
      $stylename{$tagstyle} = $styleid++ unless $stylename{$tagstyle};
      $name = ".".$stylename{$tagstyle};
      $stylesheet{$name} = $tagstyle unless $stylesheet{$name};
      print "Stylesheet entry 2: $name { $tagstyle }\n" if $debug;
      s|>| class="$stylename{$tagstyle}">|;
    }
  }
  print "After new element class: $_\n" if $debug;


# (6) now process each enclosed span, cite, or code (styled inline element)
  if (s#(.*?)<(SPAN|CITE|CODE)#<$2#) {
    my $startstuff = $1; 
    while (s#(<(SPAN|CITE|CODE).*?</\2>.*?)<(SPAN|CITE|CODE)#<$3#s) {
      push @spans, $1;
    }
    push @spans, $_;
    $_ = $startstuff;				#build element to return
  
    my $span;
    foreach $span (@spans) {			#process each span
      if ($span =~ m/<(SPAN|CITE|CODE).*?<(SPAN|CITE|CODE)/) {
         warn "Hey -- there are two span|cite|codes in that chunk ($span)!\n"
      }
      my ($spantag) = $span =~ m|^<(\w+)|;
      print "Processing $spantag: $span\n" if $debug;
      foreach $s (@{ $styles{$spantag}}) 
        { $span =~ s|$s||g; }                   #delete default styles
      $span = &normalize($span);
      print "After deleting span defaults: $span\n" if $debug;

      my ($spanstyle) = $span =~ m|^<[^>]*style="(.*?)"|;
      my ($spanclass) = $span =~ m|^<[^>]*class="(.*?)"|;

      # now delete default styles for class $spanclass, if any
      if ($spanclass and $styles{$spanclass}->[0]) {
        foreach $s (@{ $styles{$spanclass}}) 
          { $span =~ s|$s||g; }                   #delete default styles
        $span = &normalize($span);
#       print "After deleting spanclass defaults: $span\n";
      }

      print "--------Processing $spantag:-------\n$span\n" if $debug;
      print " -style: $spanstyle\n" if $spanstyle and $debug;
      print " -class: $spanclass\n" if $spanclass and $debug;
  
      if ($spanstyle) {
        $span =~ s|^(<[^>]*)style=".*?"|$1|;	#delete span style
        if ($spanclass) {
          # case (6a): have a span with style and class.
          print "-----Span 6a-----\nStartstuff: $startstuff\n" if $debug;
          print "Processing span: $span\n" if $debug;
          print "Style=$spanstyle Class=$spanclass\n" if $debug;

          # first check if we have this class on record. If not, add.
          if (!$styles{$spanclass}->[0]) {
            print "Hmmm, seems to be a new class. Let's add it.\n" if $debug;
            my @a1 = split /;/, $spanstyle;
            my @a2;
            foreach $s (@a1) {                    #for each style, 
              $s =~ s|^\s*(.*?)\s*$|$1;|;         #delete first&last spaces,
              push @{ $styles{$spanclass} }, $s;  #add to styles hash, 
            }
            $stylesheet{".".$spanclass} = $spanstyle; #add stylesheet entry
            print "Stylesheet entry: .$spanclass { $spanstyle }\n" if $debug;
          } 
          else 	#we've already seen this class -- modify it, class-x01
          {
            my $stykey = "$spanclass-$spanstyle";
            $stylename{$stykey} = $spanclass."-".$styleapp++ 
                unless $stylename{$stykey};
            $name = $stylename{$stykey};	
            my $newstyles = "";
            print "stykey=$stykey name=$name;\n" if $debug;
          }

        } else {	#span with no span class.  Make one.
          $stylename{$spanstyle} = $styleid++ unless $stylename{$spanstyle};
          $name = ".".$stylename{$spanstyle};
          $stylesheet{$name} = $spanstyle;
          print "Stylesheet entry 3: $name { $spanstyle }\n" if $debug;
          $span =~ s|>| class="$stylename{$spanstyle}">|;
        }
      }
      print "->result $span\n" if $debug;
      $_ .= $span;
    }
    print "After processing spans: $_\n" if $debug;
  }
  $_ = normalize($_);

  print "Returning:  $_\n" if $debug;
  print "-----------------------------\n" if $debug;
  if (m|style=|) 
    { warn "Classify left some remaining styles...\n"; }
  return $_;
}


#escape notes so they don't get processed later
sub escape 
{
  my $stuff = shift;
  $stuff =~ s/</\&less-than;/g;
  return $stuff;
}


#remove extra spaces, etc from styles.
sub normalize
{
  my $x = shift;
  $x =~ s|  | |g;                          #compress spaces
  $x =~ s|style="\s*"||g;                  #delete empty styles
  $x =~ s|(style=")\s+([^"]*?")|$1$2|g;    #delete leading spaces
  $x =~ s|(style="[^"]*?)\s+(")|$1$2|g;    #delete trailing spaces
  $x =~ s|\s+>|>|g;                        #delete trailing tag spaces
  $x =~ s|<SPAN\s*>([^<]*)</SPAN>|$1|gs;   #delete empty SPANs
  return $x;
}
