(.*)(\s)*/){
$pub = $1;
$found = "yes"; $done = "no";
if ($pub =~/(\s)*(.*)<\/publisher>/){ # take out end tag
$pub = $2;
$find = "doc";
# print "pub is found, looking for DOC\n";
if ($pub =~ /(\s)*(.*)(\s)*/){ #remove leading/ending spaces
$pub = $2;
}
# print "without end tag is $pub\n";
$done = "yes";
push (@pub, $pub);
}
# next;
}
$find = "doc";
}
if (($found)&& ($done eq "no") && ($find eq "pub")){
if ($_ =~ /(\s)*(.*)<\/publisher>/){ # take out end tag
$morepub = $2;
$find = "doc";
if ($morepub =~ /(\s)*(.*)(\s)*/){ #remove leading spaces
$morepub = $2;
}
if ($morepub =~ /(.*)(\s)+$/){ #including last one
$morepub = $1;
}
if ($morepub){ $pub = $pub ." ". $morepub;}
# print " found end tag\n";
# print "now: $pub\n";
$done = "yes";
push (@pub, $pub);
# next;
}
else{
$morepub = $_;
if ($morepub =~ /(\s)*(.*)(\s)*/){ #remove leading spaces
$morepub = $2;
}
if ($morepub =~ /(.*)(\s)+$/){ #including last one
$morepub = $1;
}
if ($morepub){ $pub = $pub . $morepub;}
# print "did not find end tag; adding more to pub.\n";
# next;
}
}
if ($find eq "doc"){
$find = "rights";
}
if (($done eq "yes") && ($find eq "rights")){
if ($_ =~ /(.*)<\/p><\/availability>/){
push (@rts, $1);
$rights = $1;
$done = "yes";
}
$find = "date";
}
if (($done eq "yes")&&($find eq "date")){
if($_ =~ /author>(.*)<\/date>/){
$working = $1;
while ($working =~/(.*)">.*/){
$working = $1;
}
$dat = $working;
}
if (!$dat){
if ($_ =~ /author>.*<\/date>/){
$working = $1;
while ($working =~/(.*)">.*/){
$working = $1;
}
$dat = $working;
}
# this for no authors
elsif ($_ =~ /title>.*<\/date>/){
$working = $1;
while ($working =~/(.*)">.*/){
$working = $1;
}
$dat = $working;
}
}
push (@date, $dat);
$find = "des";
}
if (($done eq "yes") && ($find eq "des")){
if ($_ =~ /.*(.*)(\s)*<\/note>(.*)<\/xref>(.*)/){
$des = $1." ".$2.$3;
}
$found = "yes"; $done = "yes";
# print "found this des:\n$2\n";
$des =~ s/ {3,}/ /g; # correct for overspacing in document
push (@descr, $des);
#next;
}
$find = "sub";
}
if ($find eq "sub"){
$work = "";
if ($_ =~ /- (.*)<\/item>/){
$work = $1;
while ($work =~ /(.*)<\/item>
- (.+)/){
push (@subject, $2);
$work = $1;
}
push (@subject, $work);
#next;
}
$find = "nada";
}
}
if (!@pub){
push (@pub, "University of Tennessee Special Collections Library");
}
push (@contrib, "University of Tennessee Special Collections Library, Knoxville");
if ($rights =~ /(.*) McClung Museum(.*)/){
push (@contrib, "Frank H. McClung Museum (Knoxville, Tenn.)");
}
if ($rights =~ /(.*)McClung Historical Collection(.*)/){
push (@contrib, "Knox County Public Library (Knoxville, Tenn.)");
}
if ($rights =~ /.*Memphis Public Library.*/){
push (@contrib, "Memphis Public Library (Memphis, Tenn.)");
}
if ($rights =~ /.*Tennessee State Library.*/){
push (@contrib, "Tennessee State Library and Archives (Nashville, Tenn.)");
}
# $coverage = " ".$dat."\n";
#$dat = " $dat\n";
# print "$titl";
# print "$coverage";
$id = "http://oai.sunsite.utk.edu/sgm/$spc_id.html";
# print "check identifier link: $id\n";
push (@ids, $id);
push (@lang , "en");
push (@rel , "Mode of Access: World Wide Web");
@type = ("Image", "Text");
# $check = 0;
# print "\nHere are all the fields for the record that you entered:\n\n";
# if(@title){print "title:"; foreach(@title){print " ".$_."\n";}}
# else {$check ++;}
# if(@creator){print "\ncreator:"; foreach(@creator){print " ".$_."\n";}}
# else {$check ++;}
# if(@subject){print "\nsubject:"; foreach(@subject){print " ".$_."\n";}}
# else {$check ++;}
# if(@descr){print "\ndescription:"; foreach(@descr){print " ".$_."\n";}}
# else {$check ++;}
# if(@pub){print "\npublisher:"; foreach(@pub){print " ".$_."\n";}}
# else {$check ++;}
# if(@contrib){print "\ncontributor:"; foreach(@contrib){print " ".$_."\n";}}
# else {$check ++;}
# if(@date){print "\ndate:"; foreach(@date){print " ".$_."\n";}}
# else {$check ++;}
# if(@type){print "\ntype:"; foreach(@type){print " ".$_."\n";}}
# else {$check ++;}
# if(@format){print "\nformat:"; foreach(@format){print " ".$_."\n";}}
# else {$check ++;}
# if(@ids){print "\nidentifier:"; foreach(@ids){print " ".$_."\n";}}
# else {$check ++;}
# if(@source){print "\nsource:"; foreach(@source){print " ".$_."\n";}}
# else {$check ++;}
# if(@lang){print "\nlanguage:"; foreach(@lang){print " ".$_."\n";}}
# else {$check ++;}
# if(@rel){print "\nrelation:"; foreach(@rel){print " ".$_."\n";}}
# else {$check ++;}
# if(@cov){print "\ncoverage:"; foreach(@cov){print " ".$_."\n";}}
# else {$check ++;}
# if(@rts){print "\nrights:"; foreach(@rts){print " ".$_."\n";}}
# else {$check ++;}
# if ($check == 15){
# print "Sorry, we cannot enter a record with no DC values.\n";
# exit;
# }
# print "\nIf all this is correct, type y\n";
# print "TO ABORT THIS RECORD AND EXIT, type x\n";
# chop($ok = );
# while ($ok ne "y" && $ok ne "x"){
# print "Please type y or or x:\n";
# chop($ok = );
# }
# if ($ok eq "x"){
# print "\nBye-Bye!!\n";
# exit;
# }
$mydate = &responseDate;
# collecting entire xml file for text blob in database
while (@all){pop(@all);}
$all = "";
push (@all, $begin_rec);
push (@all, " ");
push (@all, $us.$oai_id);
push (@all, "\n");
push (@all, " ".$mydate."\n \n");
push (@all, $meta_header.$udc_blurb);
print THIS $meta_header.$udc_blurb;
foreach(@title){print THIS " $_\n";
$titl = "";
$titl = " ".$_;
while ($titl =~ /((.){60,75} )(.+)/){ #trying to shorten the lines
push (@all,$1."\n"); $titl =""; $titl = " ".$3;
}
$titl = $titl."\n";
push (@all, $titl);
}
foreach(@creator){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@subject){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@descr){print THIS " $_\n";
$des = "";
$des = " ".$_;
while ($des =~ /((.){60,75} )(.+)/){ #trying to shorten the lines
push (@all,$1."\n"); $des =""; $des = " ".$3;
}
$des = $des."\n";
push (@all, $des);
}
foreach(@pub){print THIS " $_\n";
$pb = "";
$pb = " ".$_;
while ($pb =~ /((.){60,75} )(.+)/){ #trying to shorten the lines
push (@all,$1."\n"); $pb =""; $pb = " ".$3;
}
$pb = $pb."\n";
push (@all, $pb);
}
foreach(@contrib){print THIS " $_\n";
$cont = "";
$cont = " ".$_;
while ($cont =~ /((.){60,75} )(.+)/){ #trying to shorten the lines
push (@all,$1."\n"); $cont =""; $cont = " ".$3;
}
$cont = $cont."\n";
push (@all, $cont);
}
foreach(@date){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@type){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@format){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@ids){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@source){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@lang){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@rel){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@cov){print THIS " $_\n";
push (@all, " $_\n");}
foreach(@rts){print THIS " $_\n";
$r = "";
$r = " ".$_;
while ($r =~ /((.){60,75} )(.+)/){ #trying to shorten the lines
push (@all,$1."\n"); $r =""; $r = " ".$3;
}
$r = $r."\n";
push (@all, $r);
}
push (@all, $rec_footer);
print THIS $meta_footer;
close (THIS);
close (OLD_FILE);
#foreach (@all){print $_."\n";}
$all = join ('', @all);
print " here's my blob:\n".$all."\n";
$dbh = DBI->connect ("dbi:mysql:oai2", $username, $password)
or die "Can't connect to Mysql database: ",$DBI::errstr,"\n";
$h->{PrintError} = 1;
$h->{RaiseError} = 1;
$mypath = $rightdir.$oai_id;
$mypath = $dbh->quote($mypath);
$oai_id = $dbh->quote($oai_id);
$all = $dbh->quote($all);
$mydate = $dbh->quote($mydate);
$sth = $dbh->do("insert $xml_udc (oai_id, datestamp, path, file) values ($oai_id,
$mydate, $mypath, $all)") or die "Can't insert xml statement: $DBI::errstr\n";
$title = join("|", @title); $title = $dbh->quote($title);
$creator = join("|",@creator); $creator = $dbh->quote($creator);
$subject = join("|",@subject); $subject = $dbh->quote($subject);
$descr = join("|",@descr); $descr = $dbh->quote($descr);
$pub = join("|",@pub); $pub = $dbh->quote($pub);
$contrib = join("|",@contrib); $contrib = $dbh->quote($contrib);
$date = join("|",@date); $date = $dbh->quote($date);
$type = join("|",@type); $type = $dbh->quote($type);
$format = join("|", @format); $format = $dbh->quote($format);
$ids = join("|", @ids); $ids = $dbh->quote($ids);
$source = join("|", @source); $source = $dbh->quote($source);
$lang = join("|", @lang); $lang = $dbh->quote($lang);
$rel = join("|", @rel); $rel = $dbh->quote($rel);
$cov = join("|", @cov); $cov = $dbh->quote($cov);
$rts = join("|", @rts); $rts = $dbh->quote($rts);
$sth = $dbh->do("insert $udc_table (oai_id, datestamp, title, creator, subject, description, publisher, contributor, date, type, format, identifier, source, language, relation, coverage, rights, path) values ($oai_id, $mydate, $title, $creator, $subject, $descr, $pub, $contrib, $date, $type, $format, $ids, $source, $lang, $rel, $cov, $rts, $mypath)") or die "Can't prepare sql statement: $DBI::errstr\n";
$sth = $dbh->do("insert into $mdf_table values($oai_id, 'y')") or die "Can't prepare sql statement: $DBI::errstr\n";
if ($spc_id){
$spc_id = $dbh->quote($spc_id);
$sth = $dbh->do("insert $id_table (oai_id, spc_id) values($oai_id, $spc_id)") or warn "Can't insert spc statement: $DBI::errstr\n";
}
$dbh->disconnect
or warn "Disconnection failed: $DBI::errstr\n";
} # end of if this file name fits 2 letters 3 numbers
close(THIS);
close(THAT);
} # end of looking through this directory
closedir(DIR);
exit;
sub responseDate{
#format the date response!
@when = split(/ +/, `date -u`);
($mymonth, $myday, $time, $myyear) = (@when)[1,2,3,5];
chop($myyear); #lose the newline
# month must be in numbers
if ($mymonth eq "Jan"){$mymonth = "01";}
elsif ($mymonth eq "Feb"){$mymonth = "02";}
elsif ($mymonth eq "Mar"){$mymonth = "03";}
elsif ($mymonth eq "Apr"){$mymonth = "04";}
elsif ($mymonth eq "May"){$mymonth = "05";}
elsif ($mymonth eq "Jun"){$mymonth = "06";}
elsif ($mymonth eq "Jul"){$mymonth = "07";}
elsif ($mymonth eq "Aug"){$mymonth = "08";}
elsif ($mymonth eq "Sep"){$mymonth = "09";}
elsif ($mymonth eq "Oct"){$mymonth = "10";}
elsif ($mymonth eq "Nov"){$mymonth = "11";}
else{$mymonth = "12";}
if ($myday<10){ $myday = "0".$myday;} #day must be 2 digits long
$mydate = $myyear."-".$mymonth."-".$myday."T".$time."Z";
}