#!/usr/bin/perl $story_count = 0; $sentence_counter = 0; $corpus_file = "/home/james/scifi/corpus/scifi.corpus.data"; open(CORPUS, "$corpus_file") || die "Can't open file $corpus_file\n"; print "What term?\n"; $term = <>; chop($term); print "Output type? (s/f/db)\n"; $output = <>; chop($output); if ($output =~ /f/) { print "What filename for output?\n"; $unix_filename = <>; chop($unix_filename); open(UNIXFILE, ">$unix_filename") || die "Can't open outfile $output_filename\n"; } if ($output =~ /db/) { print "What filename for output? (.txt)\n"; $db_filename = <>; chop($db_filename); open(DATABASE, ">$db_filename") || die "Can't open outfile $output_filename\n"; } $/ = ""; print STDERR "\nSearching Early Science Fiction Corpus\n"; while () { if ($term =~ / /) { $_ =~ s/\n/ /g; $_ =~ s/[ ]+/ /g; } if ($_ =~ /$term/i) { $_ =~ m|(.*?)|; $auth = $1; $_ =~ m|(.*?)|; $title = $1; if ($_ =~ /[^<]/) { $_ =~ m|(.*?)|; $story = $1; $story_count = 1; } else { $story = ""; $story_count = 0; } $_ =~ m|(.*?)|; $year = $1; $_ =~ m|(.*?)|; $chap = $1; $rec = $_; if ($rec =~ /\n/) { $rec =~ s/\n/ /g; } $rec =~ s/[ ]+/ /g; while ($rec =~ s/(.*?[\.\?\!])//) { $sentence = $1; $sentence =~ s/^ //; if ($sentence =~ m/$term/) { $sentence_counter++; if ($sentence =~ /<\/CHAP>/) { $sentence =~ s/.*<\/CHAP>//g; } if ($output =~ /f/) { if ($sentence_counter == 10) { print STDERR "\nMore than ten!\n\n"; } if ($sentence_counter == 50) { print STDERR "\nMore than fifty!\n\n"; } if ($sentence_counter == 200) { print STDERR "\nMore than two-hundred!\n\n"; } if ($story_count == 0) { print UNIXFILE "\n$year\, $auth\, $title\, $chap\n$sentence\n"; } else { print UNIXFILE "\n$year\, $auth\, \`$story', $title\, $chap\n$sentence\n"; } } elsif ($output =~ /db/) { $sortkey = $term; $sortkey =~ s/[ ]+//g; $sortkey =~ s/[\-\']//g; print DATABASE "\%Start\:\n"; print DATABASE "\%sortkey\:$sortkey\n"; print DATABASE "\%headword\:$term\n"; print DATABASE "\%pos\:\n"; print DATABASE "\%page\:\n"; print DATABASE "\%col\:\n"; print DATABASE "\%chap\:$chap\n"; if ($title =~ /Hector S/) { $year =~ s/ \(1911\)//; } print DATABASE "\%year\:$year\n"; print DATABASE "\%author\:$auth\n"; print DATABASE "\%story\:$story\n"; print DATABASE "\%mag\/book\:$title\n"; if ($title =~ /Hector S/) { print DATABASE "\%date\:\(1911\)\n"; } else { print DATABASE "\%date\:\n"; } print DATABASE "\%place\:(Project Gutenberg)\n"; print DATABASE "\%ed\:\n"; print DATABASE "\%anthology\:\n"; print DATABASE "\%section\:\n"; print DATABASE "\%cit\:$sentence\n"; print DATABASE "\%www\:\n"; print DATABASE "\%def\:\n"; print DATABASE "\%notes\:\n"; } else { if ($story_count == 0) { print "\n$year\, $auth\, $title\, $chap\n$sentence\n"; } else { print "\n$year\, $auth\, \`$story', $title\, $chap\n$sentence\n"; } } } } } } if ($output =~ /db/) { print DATABASE "\%End\:\n"; } if ($sentence_counter == 0) { print "\nThere were none\!\n\n"; } close(CORPUS); print STDERR "\cG\cG\cG\cG";