#!/usr/bin/perl -w #!/usr/bin/perl5.004/Shell.pm package Shell; use Shell qw(:find); use Cwd; use File::Find qw(finddepth); # this version does not link to GA, or to djvu images! Links are ours. # Header is Tennessee Documentary History # to use to change any TEI lite files in this directory into html format # that have not already been format: will do all in file: # type teiSgml2html (return) and wait! # code from Susan Gants, University of Georgia, GALILEO project, # tweaked by Jody DeRidder 9/06/02 opendir(DIR,"/usr/sunsite/htdocs/oai/newfiles/tdh_xml/scsgm/"); foreach $_ (readdir(DIR)){ if (($_ =~ /([0-9]*)\.sgm(l)?/) && (! -e $1."html")){ print " Formatting into html file ".$_ ."\n"; $filename = $_; ## Open input file open (TEXT, $filename) || die "cannot open $filename - $!"; ( $prefix = $filename) =~ s,(.*?)\.sgm(l)?,$1,; #### Open output file (my $outfile = $filename) =~ s,(.*?)\.sgm(l)?,$1.html,; print " outfile is $outfile; infile is $filename\n"; open (OUT, ">$outfile") || die "cannot open file - $!"; ## just a couple of style specs my $style_parms = ""; ## link colors my $body_attr = qq{bgcolor="#ffffff" text="#000000" link="#0000cc" vlink="#0000cc" alink="#cc0000"}; ## basic page layout my $hdr = qq{\n

\n\n\n
A University of Tennessee Digital Library Database
\n
\n\n\n
Tennessee Documentary History, 1796-1850
\n\n
\n}; ## values for HI REND tags # no attrib --> my %HIstarts = ( 'b' => qq{}, 'i' => qq{}, 'o' => '', 'o, b' => qq{}, 'sc' => '', # 'typewritten' => qq{[printed text: }, # 'printed text' => qq{[printed text: }, # 'printed text, i' => qq{[printed text: }, # 'printed text, sc' => qq{[printed text: }, # 'written text' => qq{[written text: }, ); my %HIends = ( 'b' => qq{}, 'i' => qq{}, 'o' => '', 'o, b' => qq{}, 'sc' => '', # 'typewritten' => qq{ ]}, # 'printed text' => qq{ ]}, # 'printed text, i' => qq{ ]}, # 'printed text, sc' => qq{ ]}, # 'written text' => qq{ ]}, ); ## values for display my %langs = ( 'lat' => 'Latin', 'fra' => 'French', 'ger' => 'German', ); #### Prompt for input file ################### #### NOTE ##### assumes pgm is run in directory containing sgm files ################### #print "\nEnter filename: \n"; # chop (my $filename = ); ## set input record delimiter to undefined, slurp in whole file undef $/; my $line = ; ## omit line ends - file is now one long string $line =~ s,\r,,g; # carriage returns from DOS $line =~ s,>\n,>,g; # "real" record ends $line =~ s,([^>])\n,$1 ,g; # mid-line $line =~ s,^\n$,,; # blank lines $line =~ s,\ \;, ,g; $line =~ s,<\?[^>]*>,,g; # xap PI's outright ## Print header print OUT ("Tennessee Documentary History\n$style_parms\n\n\n", $hdr, "\n\n,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; # FIGURE tags ----------------------------------------- $line =~ s,
(.+?)(.+?)
,
\[ Note: $2; $1 \],gi; $line =~ s,
(.+?)
,
\[ Note: $1 \],gi; $line =~ s,
(.+?)
,
\[ Note: $1 Click on page image to view \],gi; # various tags ----------------------------------------- $line =~ s,,\n,gi; $line =~ s,,\n,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,\n,gi; $line =~ s,,,gi; $line =~ s,,\n,gi; $line =~ s,,,gi; $line =~ s,,\n,gi; $line =~ s,,\n,gi; $line =~ s,,\n,gi; $line =~ s,,,gi; # All other NOTEs within TEXT ----------------------------------------- $line =~ s,(.*?), \[ Note: $1 \] ,gi; # Paragraphs, Quotes, Linegroups, Lines, Line Breaks ---------------------- $line =~ s,

,\n

",gi; $line =~ s,

,"

\n,gi; $line =~ s,,\n,gi; $line =~ s,,\n,gi; $line =~ s,,\n
,gi; $line =~ s,
,\n,gi; $line =~ s,

,\n

,gi; $line =~ s,,\n
,gi; # Page Breaks -------------------------------------- # this section does not always work, so only using one line of it if ($line =~ /()

,\n,gi; $line =~ s,,\n

Page: $2   \[jpg image\]

,gi; # Letter related tags -------------------------------------- $line =~ s,

,,gi; $line =~ s,
,,gi; $line =~ s,,
,gi; $line =~ s,
,,gi; $line =~ s,,
,gi; $line =~ s,
,,gi; $line =~ s,,\n
,gi; $line =~ s,
,,gi; $line =~ s,,\n
,gi; $line =~ s,
,,gi; $line =~ s,,\n

,gi; $line =~ s,,,gi; $line =~ s,X, X ,gi; $line =~ s,,\n
\[Signed\] ,gi; $line =~ s,,\n
\[Signed\] ,gi; $line =~ s,
,
,gi; # text change tags -------------------------------------- $line =~ s,,\[illegible\],gi; $line =~ s,(.*?),\[unclear: $1\],gi; $line =~ s,,\[$1\],gi; $line =~ s,,\[$2: $1 \],gi; $line =~ s,(.*?),$1 \[$2\],gi; $line =~ s,(.+?),$2 \[$1\],gi; $line =~ s,(.+?),$2 \[$1\],gi; $line =~ s,(.*?),$2 \[$1\],gi; $line =~ s,(.*?),$2 \[$1\],gi; $line =~ s,(.*?),\[added ($1): $2\],gi; $line =~ s,(.*?),\[added ($1): $2\],gi; $line =~ s,(.*?),\[added: $1\],gi; $line =~ s,(.*?),\[deleted ($1): $2\],gi; $line =~ s,(.*?),\[deleted: $1\],gi; $line =~ s,(.*?),\[$langs{lc($1)}: $2\],gi; $line =~ s,,,gi; $line =~ s,,,gi; #$line =~ s,(.+?),$HIstarts{lc($1)}$2 HIends{lc($1)},gi; $line =~ s,,,gi; $line =~ s,,,gi; # LIST and ITEM tags -------------------------------------- while ($line =~ //i) { # yo, is this right???? $line =~ s,,\n,gi; $line =~ s,,\n,gi; $line =~ s,,\n
,gi; $line =~ s,,
,gi; $line =~ s,
,,gi; } elsif ($line =~ /,\n,gi; $line =~ s,,\n,gi; $line =~ s,
, \n,gi; $line =~ s,,,gi; $line =~ s,,,gi; } elsif ($line =~ /,\n

    ,gi; $line =~ s,,\n
,gi; $line =~ s,,\n
  • ,gi; $line =~ s,,
  • ,gi; $line =~ s,,
  • ,gi; } # end if } # end while # XREF tag -------------------------------------- ### This creates an internal link to a second document. ### $bin would be a call to the search program, ### with 'type=doc' indicating a retrieval of an entire document, and ### tei2id=$1 being the ID of the document to be retrieved. ### I'm not sure how you'll want to handle this my $bin = ''; $line =~ s,(.+?),$1,gi; ### print out 1-line file and closing HTML tags print OUT "$line\n"; print OUT ('

    \n"); ## Process input file # drop DOCTYPE and ENTITY tags ----------------------------------------- $line =~ s,,,i; # for files with ENTITY tags $line =~ s,,,i; # for file without # clean up character entities ----------------------------------------- $line =~ s,\&hyphen\;,-,g; $line =~ s,\&ndash\;, - ,g; $line =~ s,\&mdash\;, -- ,g; $line =~ s,\&[lr]dquo\;,\",g; $line =~ s,\&[lr]squo\;,\',g; $line =~ s,\&cross\;,+,g; $line =~ s,\&dagger\;,+,g; $line =~ s,\&plus\;,+,g; $line =~ s,\&lcub\;,{,g; $line =~ s,\&rcub\;,},g; $line =~ s,\&frac38\;,3/8,g; $line =~ s,\&check\;,[checkmark],g; $line =~ s,\&equals\;,\=,g; $line =~ s,\&dollar\;,\$,g; $line =~ s,\―,
    ,g; # TEI.2 tags ----------------------------------------- $line =~ s,,Document: $1,i; if ($1){$prefix = $1;} $line =~ s,,,i; # TEIHEADER section ----------------------------------------- $line =~ s,,,i; $line =~ s,.*?,,i; $line =~ s,.*?,,i; $line =~ s,.*?,,i; # SOURCEDESC tags ----------------------------------------- if ( $line =~ m,(.*?),i ) { my $temp = $1; $temp =~ s,,,i; $temp =~ s,,\n<h3>,i; $temp =~ s,,\n,i; $temp =~ s/<\/AUTHOR>/; /ig; $temp =~ s,(.*?),\n
    author: $1,i; $temp =~ s,(.*?),\n
    publication place: $1,i; $temp =~ s,(.*?),\n
    publisher: $1,i; $temp =~ s, -- (.*?),\n
    date: $1,i; $temp =~ s,(.*?),\n
    extent: $1,i; $temp =~ s,(.*?),\n
    $1: $2,gi; $temp =~ s,
    ,,i; $line =~ s,.*?,$temp,i; } # end if $line =~ s,
    ,,i; $line =~ s,.*?,,i; $line =~ s,.*?,,i; $line =~ s,
    ,

    ,gi; # TEXT tags ----------------------------------------- $line =~ s,,,i; $line =~ s,,,i; # FRONT section ----------------------------------------- $line =~ s,,\n


    ,i; $line =~ s,,
    \n,i; $line =~ s,,

    \n,i; $line =~ s,,
    \n,i; $line =~ s,,
    \n,i; $line =~ s,,
    \n,i; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,,
    \n,i; $line =~ s,,
    \n,i; $line =~ s,,,gi; $line =~ s,,,gi; $line =~ s,(.*?),$1,i; $line =~ s,(.*?),$1,i; $line =~ s,(.*?),$1,i; $line =~ s,,
    \n,i; # BODY/BACK tags ----------------------------------------- $line =~ s,,
    \n

    ,i; $line =~ s,,\n,i; $line =~ s,,


    ,i; $line =~ s,,
    ,i; # TABLE tags ----------------------------------------- $line =~ s,,
    \n
    ,gi; $line =~ s,,
    ,gi; $line =~ s,
    ,
    \n

    ,gi; $line =~ s,,

     
    $1
    '); } closedir(DIR); } ############################## sub num { my @fig = ('', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',); my $parm = shift; # print "*** $prefix N= $parm \n"; if ($parm =~ /\[(\d+)\]/) { $a = $fig[$1]; } elsif ($parm =~ /(\d+)/) { $a = $fig[$1]; } if (!$a){ $a = "";} $b = qq{PB ID="$prefix$a" N="$parm"}; return $b; } ### end program