#!/usr/local/bin/perl # # Bibgrep: Retrieve bibliographic records matching all the search words # (really regular expressions) given as arguments. Disjunctive searches can # be given as regular expressions: 'NPI|any' # # Alexis Dimitriadis 11/22/95 alexis@ling.upenn.edu # Searches are always case-insensitive. # Latex markup in the entries is removed to facilitate searching. # If the flag -w is given, only complete words are matched. # # The flag -f can be used to specify a database to search. If it is # not used, a default specified by the environment variable BIBFILE is # searched instead. Finally, a built-in default is searched. # # The algorithm depends on there being no blank lines inside any entry. # Uncomment to debug #$,=" "; #sub stamp { print ((times)[(0,1)], "\t", $_[0], "\n"); } sub stamp { print ($_[0], "\n"); } #stamp "Start" if ($opt_v); $* = 1; # Tells perl we have embedded newlines require "getopts.pl"; &Getopts("vf:w") || exit 2; # bail out if unknown flags are seen # Find the name of the database to search $BIBFILE = $ENV{BIBFILE}; $BIBFILE = $opt_f if ($opt_f); # Specify alternate database $BIBFILE_DEFAULT = $ENV{HOME} . "/lib/texinputs/linguistics.bib"; $BIBFILE = $BIBFILE_DEFAULT if (!$BIBFILE && -f $BIBFILE_DEFAULT); warn "No default bibliography database found: specify one with -f\n" unless $BIBFILE; die "Syntax: $0 [-f bibliographyfile ] searchkey [...]\n" if ($#ARGV < 0 || !$BIBFILE); # quote all pluses in search keys # (let's allow other wildcards for now) foreach $pat (@ARGV) { $pat =~ s/\+/\\+/g; } # By default, match partial words # If -w is given, match complete words only # The parentheses are there for disjunctive searches # substitution on each argument, not a search if ($opt_w) { @patterns = map("(?i)\\b($_)\\b", @ARGV); } else { @patterns = map("(?i)$_", @ARGV); } open(STDIN, $BIBFILE) || die "Can't open $BIBFILE for reading\n"; stamp "reading... " if $opt_v; # Read in entire file, clean up & separate into entries. #$_ = join("", ); # faster to undefine $/ undef $/; $_ = ; stamp "editing... " if $opt_v; s/@/\n@/g; # Ensure each /^@/ begins a paragraph # These delete trailing spaces, but keep trailing non-letters # remove completely: font changes, cedille, \cite* s/\\(((text)?it|bf|sl)|em|emph|c|cite([a-z]*))(\s+|([^a-zA-Z]))/\6/g; s/\\bemph\s*\[([^]]+)\]/\1/g; # my bracketless emphasis, \bemph[...] # Simplify by replacing with ascii approximation: s/\\([ijo])(\s+|([^a-zA-Z]))/\1\3/gi; # simplify dotless i, j, \o s/\\aa(\s+|([^a-zA-Z]))/a\2/gi; # simplify \aa s|\\["'^`~/\\]||g; # Delete accents ", \/, \\ s/[{}]//g; # delete all braces (we put some back around entries later) s/\\ / /g; # clean up \-space # The old, multi-pass expressions: (Only marginally slower, if at all) # $_ =~ s/\\(em|it|bf|c)\s+//g; # delete font changes, cedille # $_ =~ s/\\(em|it|bf|c)([^a-zA-Z])/\2/g; # ditto # $_ =~ s/\\([ij])\s+/\1/g; # simplify dotless i, j # $_ =~ s/\\([ij][^a-zA-Z])/\1/g; # ditto # # $_ =~ s|\\["'^`~/]||g; # Delete accents, \/ # # $_ =~ s/{([^{}\n]*)}/\1/g; # Delete all braces enclosing capitals etc. # $_ =~ s/{([^{}\n]*)}/\1/g; # again. # $_ =~ s/{([^{}\n]*)}/\1/g; # and again. @cites = split(/\n\s*\n+/, $_); # split on empty lines stamp "main loop..." if $opt_v; # Match each entry in turn with all search words. # (default search is an AND of all patterns). # I can't study the search pattern, but I can precompile the search # string to run faster ENTRY: foreach $entry (@cites) { foreach $pat (@patterns) { if ($entry !~ $pat) { next ENTRY; } die $@ if $@; } if ($entry =~ /\@[A-Za-z]* /) { $entry =~ s/(\@[A-Za-z]*) /\1\{ /; $entry .= "\n}"; } push(@good, $entry); } # stamp "Printing:"; $, = "\n\n"; # output record separator $\ = "\n"; # appended to print output print @good if (@good); $,=" "; $\ = ""; # stamp "Bye"; __END__