#!/usr/bin/perl # Program to convert simple typed-in data into Notebook input format # # Supports these POS's # n/nu/nc/vi/vt/a(dj)/adv/int(erj)/phr/prep/pron/nom/[blank] # # 28/8/99; updated 28/4/00 ###################################### ## Set up biliographic details here ## ###################################### $start = "\%Start\:\n"; $end = "\%End\:"; $chap = "\%chap\:"; $cit = "\%cit\:\n\%def\:\n"; $sect = "\%section\:\n"; print "Year of text is?\n"; $textyear = <>; chop($textyear); $year = "\%year\:$textyear\n"; print "Name of author is?\n"; $authname = <>; chop($authname); $auth = "\%author\:$authname\n"; print "Story name is?\n"; $storname = <>; chop($storname); $story = "\%story\:$storname\n"; print "Book/Mag title is?\n"; $bmtitle = <>; chop($bmtitle); $book = "\%mag\/book\:$bmtitle\n"; print "Printing date is?\n"; $prindate = <>; chop($prindate); if ($prindate =~ /^19[0-9][0-9]$/) { $prindate = "\($prindate\)"; } $date = "\%date\:$prindate\n"; print "Provenance is?\n"; $prov = <>; chop($prov); $place = "\%place\:$prov\n"; print "Name of editor is?\n"; $edname = <>; chop($edname); $ed = "\%ed\:$edname\n"; print "Anthology title is?\n"; $anthname = <>; chop($anthname); $anth = "\%anthology\:$anthname\n"; print "Notes, if any!\n"; $notetext = <>; chop($notetext); $notes = "\%notes\:$notetext\n"; ################## ## Main Program ## ################## print "What file to read from?\n"; $infile = <>; chop($infile); $filecheck = &check_for_file; while ($filecheck == 1) { print "Can't find that file. What file to read from?\n"; $infile = <>; chop($infile); $filecheck = &check_for_file; } open(FILE1, "$infile") || die "\"Cannot find file $infile\"\n"; $outfile = "$infile\.in"; &check_for_outfile; print " $year $auth $story $book $date $anth\n"; print "Is this all correct? (y/n)\n"; $answer = <>; chop($answer); if ($answer eq 'y') { print "Writing output to \"$outfile\"\n"; $/ = "\n"; while () { $_ =~ s/\- \-/ \- /g; $_ =~ s/\-\-/\-/g; $_ =~ s/ \-([0-9])/ \- $1/g; $_ =~ s/([a-z])\- /$1 \- /g; $_ =~ s/[\t]+/ /g; $_ =~ s/[ ]+/ /g; if ($_ =~ /CHAP/) { $chapnum = &get_chapnum($_); } elsif ($_ =~ /\-/) { $parsed_input = &parse_input; print FILE2 "$start"; print FILE2 "$parsed_input"; print FILE2 "$chap$chapnum\n"; print FILE2 "$year"; print FILE2 "$auth"; print FILE2 "$story"; print FILE2 "$book"; print FILE2 "$date"; print FILE2 "$place"; print FILE2 "$ed"; print FILE2 "$anth"; print FILE2 "$sect"; print FILE2 "$cit"; print FILE2 "$notes"; } else { print STDERR "\nERROR $_\n"; } } print FILE2 "$end"; } else { exit; } ################# ## Subroutines ## ################# sub check_for_file { $count = 0; unless (-e $infile) { $count = 1; } return $count; } sub tidy_skey { local($skey) = $_[0]; if ($skey =~ m|0|) { $skey =~ s/0/zero/; } if ($skey =~ m|1|) { $skey =~ s/1/one/; } if ($skey =~ m|2|) { $skey =~ s/2/two/; } if ($skey =~ m|3|) { $skey =~ s/3/three/; } if ($skey =~ m|4|) { $skey =~ s/4/four/; } if ($skey =~ m|5|) { $skey =~ s/5/five/; } if ($skey =~ m|6|) { $skey =~ s/6/six/; } if ($skey =~ m|7|) { $skey =~ s/7/seven/; } if ($skey =~ m|8|) { $skey =~ s/8/eight/; } if ($skey =~ m|9|) { $skey =~ s/9/nine/; } if ($skey =~ m|therim |) { $skey = "rim,the nom"; } if ($skey =~ m|flyer|) { $skey =~ s/flyer/flier/; } if ($skey =~ m|orbit\,in |) { $skey = "inorbit phr"; } if ($skey =~ m|thevoid |) { $skey = "void n"; } if ($skey =~ m|ofspace$|) { $skey =~ s/(.*)(ofspace)/$2 $1/; } if ($skey =~ m|ofspace |) { $skey =~ s/(.*)(ofspace) (.*)/$2 $1/; } if ($skey =~ m|thebelt|) { $skey = "belt nom"; } if ($skey =~ m|belt\,the|) { $skey = "belt nom"; } if ($skey =~ m|stars\,the|) { $skey = "stars,the n"; } if ($skey =~ m|thestars|) { $skey = "stars,the n"; } if ($skey =~ m|gonova|) { $skey = "nova,go phr"; } if ($skey =~ m|^cosmicray$| || $skey =~ m|^cosmicray |) { $skey = "cosmicrays"; } if ($skey =~ m|^number [nom]+$|) { $skey = "number"; } if ($skey =~ m|anachronism n|) { $skey = "anachronism"; } if ($skey =~ m|exclamation|) { $skey = "exclamation"; } if ($skey =~ m|\&..lig\;|) { $skey =~ s/\&(..)lig\;/$1/; } if ($skey =~ m|\&.acute\;|) { $skey =~ s|\&(.)acute\;|$1|g; } if ($skey =~ m|\&.grave\;|) { $skey =~ s|\&(.)grave\;|$1|g; } if ($skey =~ m|\&.circ\;|) { $skey =~ s|\&(.)circ\;|$1|g; } return $skey; } sub get_chapnum { local($chapnum) = ""; $_ =~ /CHAP (.*)/; $chapnum = $1; return $chapnum; } sub parse_input { local($record) = $_; chop($record); if ($record =~ m|[0-9]/[0-9]|) { if ($record =~ / n \- /) { $record =~ /(.*?) (n) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / n[uc] \- /) { $record =~ /(.*?) (n[uc]) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / v[it] \- /) { $record =~ /(.*?) (v[it]) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif (($record =~ / a \- /) || ($record =~ / adj \- /)) { $record =~ s/( a)dj( \- )/$1$2/gio; $record =~ /(.*?) (a) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / phr \- /) { $record =~ /(.*?) (phr) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif (($record =~ / int \- /) || ($record =~ / interj \- /)) { $record =~ s/( int)erj( \- )/$1$2/gio; $record =~ /(.*?) (int) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / adv \- /) { $record =~ /(.*?) (adv) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / prep \- /) { $record =~ /(.*?) (prep) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / pron \- /) { $record =~ /(.*?) (pron) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / nom \- /) { $record =~ /(.*?) (nom) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = $2; $page = $3; $col = $4; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } else { $record =~ /(.*?) \- ([0-9\-]+)\/([0-9]+)/; $term = $1; $pos = ""; $page = $2; $col = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; print STDERR "NO POS $term\n"; } $skey =~ s/\[JJJ\]/ /i; $tidied_skey = &tidy_skey($skey); $ret_string = "\%sortkey\:$tidied_skey\n\%headword\:$term\n\%pos\:$pos\n\%page\:$page\n\%column\:$col\n"; } else { if ($record =~ / n \- /) { $record =~ /(.*?) (n) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / n[uc] \- /) { $record =~ /(.*?) (n[uc]) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / v[it] \- /) { $record =~ /(.*?) (v[it]) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif (($record =~ / a \- /) || ($record =~ / adj \- /)) { $record =~ s/( a)dj( \- )/$1$2/gio; $record =~ /(.*?) (a) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / phr \- /) { $record =~ /(.*?) (phr) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif (($record =~ / int \- /) || ($record =~ / interj \- /)) { $record =~ s/( int)erj( \- )/$1$2/gio; $record =~ /(.*?) (int) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / adv \- /) { $record =~ /(.*?) (adv) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / prep \- /) { $record =~ /(.*?) (prep) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / pron \- /) { $record =~ /(.*?) (pron) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } elsif ($record =~ / nom \- /) { $record =~ /(.*?) (nom) \- ([0-9\-]+)/; $term = $1; $pos = $2; $page = $3; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; } else { $record =~ /(.*?) \- ([0-9\-]+)/; $term = $1; $pos = ""; $page = $2; $skey = "$term\[JJJ\]$pos"; $skey =~ s/[ \'\-\.]+//g; $skey =~ tr/A-Z/a-z/; print STDERR "NO POS $term\n"; } $skey =~ s/\[JJJ\]/ /i; $tidied_skey = &tidy_skey($skey); $ret_string = "\%sortkey\:$tidied_skey\n\%headword\:$term\n\%pos\:$pos\n\%page\:$page\n\%column\:\n"; } return $ret_string; } sub check_for_outfile { if (-e $outfile) { print "The file $outfile already exists. Overwrite? (y/n)"; $overwrite = <>; chop($overwrite); if ($overwrite =~ /[yY]/) { open(FILE2, ">$outfile") || die "\"Cannot open file $outfile\"\n"; } else { print "Choose another output filename\n"; $outfile = <>; chop($outfile); &check_for_outfile; } } else { open(FILE2, ">$outfile") || die "\"Cannot open file $outfile\"\n"; } }