#!/usr/local/bin/perl # # Usage: bids.to.bibtex file file ... > output # # where bids.to.bibtex is this file # # History: originally written by Jonathan Swinton (BIDS -> ? format) # modified by Anthony Stone (BIDS -> Muscatel format) # modified by Ben Bolker (BIDS -> BibTeX format) # # Handles 'short downloading format' output from BIDS, converts to BibTeX # # wish list: # - handle verbose downloading format (more or less done) # - clean up special-case (caps/proper/species/author/special) word handling slightly ... # - handle more general input/output formats # - approximate matching on journal titles/author names (using agrep) ??? # - merge rm.to.bibtex and bids.to.bibtex ... lots of common functions # - flush last article (if no blank last line) # - (possibly) try to convert " - " to ": " ? # - fix ordering of hyphenated etc. proper-words # - deal with abstracts, BIDS RF, etc. eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_]+=)(.*)/ && shift; # process any FOO=bar switches (this program doesn't need any) $[ = 1; # set array base to 1 $, = "\n"; # set output field separator $\ = "\n\n"; # set output record separator format STDOUT = ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $line ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $line . @intags{'header','author','title','journal','volume','year','issue','pages','none'} = ('','au- ','ti- ','jn- ','vo- ','yr- ','is- ','pg- ',''); @outtags{'header','author','title','journal','volume','year','issue','pages','none'} = ('','author = "','title = "','journal = "','volume = "','year = "','number = "','pages ="',''); @entry{'header','author','title','journal','volume','year','issue','pages','none'} = ('','','','','','','','',''); $type = 'none'; $midsep = '",'; $endsep = '"'; $beginital = "{\\em "; $endital = "}"; $beginprotect = "{"; $endprotect = "}"; ################################################################### # functions for manipulating case etc. of incoming words: # initcaps, firstupper, lowertrivial, capwords, properwords, # speciesnames, makital, special ################################################################### sub initcaps # Capitalise initial letter of every word, lower-case the rest { local ($arg) = @_[1]; $arg =~ s/([A-Za-z])([A-Za-z]+)/\U\1\E\L\2\E/g; $arg; } sub lowercase # Lowercase everything { local ($arg) = @_[1]; $arg =~ tr/A-Z/a-z/; $arg; } sub firstupper # Capitalise initial letter of every sentence, lower-case the rest { local ($arg) = @_[1]; $arg =~ s/(.)(.*)/\U\1\E\L\2\E/; $arg =~ s/([.?!]\s+)([a-z])/\1\u\2/g; $arg; } sub lowertrivial { # this list should be sufficiently short, and constant, that there's no real # point reading it from a file ... local ($arg) = @_[1]; @words = ('a','an','and','by','for','from','in','of','the','to','with','de','la','pour','und'); foreach $word (@words) { $arg =~ s/\b$word\b/$word/gi; } $arg; } sub input_capwords # read capitalized words into an associative array { if (open(CAPSFILE, "words.cap")) { while () { chop; ($capwords{$_} = $_) =~ tr/a-z/A-Z/; } } } sub capwords # convert words that should be capitalized { local ($arg) = @_[1]; while (($key,$value) = each %capwords) { # $arg =~ s/\b$key\b/{$value}/gi; $value = $beginprotect . $value . $endprotect; $arg =~ s/\b$key\b/$value/gi; } $arg; } sub input_properwords # read proper words into an associative array { if (open(PROPERFILE, "words.proper")) { while () { chop; if (!/^$/) { ($properwords{$_}=&initcaps($_)) =~ s/-/ /g; } } } # while (($key,$value) = each %properwords) { print "$key=$value\n"; } } sub properwords # convert words that should be proper { local ($arg) = @_[1]; # note that any word *beginning* with a string in PROPERFILE will # be converted: this allows things like English/England, Africa/African, etc. # (but could probably be handled more cleverly, and might work wrong in # a few cases) while (($key,$value) = each %properwords) { $arg =~ s/\b$key(\w*)\b/{$value\1}/gi; } $arg; } sub input_speciesnames # read species names into an associative array { if (open(SPECIESFILE, "words.species")) { while () { chop; ($sppname = $_) =~ s/^([A-Za-z])([A-Za-z]+)/\U\1\E\L\2\E/; $sppname =~ s/-/ /; $sppname = $beginprotect . &makeital($sppname) . $endprotect; $speciesnames{$_}=$sppname; } } } sub speciesnames { # find and convert species names (genus name proper, species name lowercase); # surround the whole thing with brackets to protect it from BibTeX local ($arg) = @_[1]; while (($key,$value) = each %speciesnames) { $arg =~ s/\b$key\b/$value/gi; } $arg; } sub makeital # Italicize an argument { local($arg) = @_[1]; $arg = $beginital . $arg . $endital; $arg; } sub special # Add your own acronyms: this section should be left only # for those cases that can't get handled by converting to uppercase, # proper, etc. (and particularly, need to be searched for by regular expression) { local ($arg) = @_[1]; $arg =~ s/\bmc(-*)scf\b/MC\1SCF/g; $arg =~ s/\b([0-9]+)g\b/\1G/g; $arg =~ s/\bco2\b/{CO\$_2\$}/gi; $arg =~ s/ \.([1-9])\. /. \1: /g; $arg; } sub input_authornames # read species names into an associative array { if (open(AUTHORFILE, "words.author")) { while () { chop; ($name = $_) =~ tr/A-Za-z//cd; $authornames{$name}=$_; } } # while (($key,$value) = each %authornames) { # print("$key=$value\n"); } } sub specialauthor # hyphenated, etc. author names to fix { local ($arg) = @_[1]; if ($domcnames) { $arg =~ s/\bmc([a-z])/Mc\U\1\E/gi; # maybe too dangerous ... $arg =~ s/\bmac([a-z])/Mac\U\1\E/gi; }; while (($key,$value) = each %authornames) { $arg =~ s/\b$key\b/$value/gi; } $arg; } sub formulae # Chemical formulae -- impossible to get them all right { local ($arg) = @_[1]; # $arg =~ s/(\b[a-z]+[1-9]+[a-z0-9]*[+]?\b)/\U\1\E/gi; $arg =~ s/\b([a-z]+)([1-9]+)([a-z0-9]*[+]?)\b/{\U\1\E\$_\2\$\U\3\E}/gi; # next two lines cap. single letters preceded or followed by a hyphen $arg =~ s/\b([a-z])-([A-Za-z]+)\b/{\U\1\E}-\2/gi; $arg =~ s/\b([A-Za-z]+)-([a-z])\b/\1-{\U\2\E}/gi; $arg; } ####################################################################### sub getcitekey # automatically generate a citation key from the author names and date. # the rules are: # # if 1 author: # if length (surname) <= wholen surname+year # else first $abbren letters of surname+year # # if < ($joinall) authors # first $abbren letters of each surname+year # # if < ($firstlet) authors # first letter of each surname+year # # else # if length (surname) <= wholen surname+$etalstr+year # else first $abbren letters of surname+$etalstr+year { local (@arg) = @_; local ($key); $wholen=4; $abbrlen=4; $joinall=2; $firstlet=3; $etalstr='+'; # get last 2 characters of year if (length($arg[2]==4)) { $year=substr($arg[2],3,2); } else { if (length($arg[2]==2)) { $year=$arg[2]; } else { $year="??" }; }; # split up auth string into an array ($auth = $arg[1]) =~ s/, [A-Z]+//g; @autharr = split(/;/,$auth); if (($#autharr == 1) && ($wholen>=length($autharr[1]))) { $key = &initcaps(@autharr[1]); } else { if ($#autharr <= $joinall) { foreach $auth (@autharr) { $key .= substr(&initcaps($auth),1,$abbrlen); } } else { if ($#autharr <= $firstlet) { foreach $auth (@autharr) { $key .= substr($auth,1,1); } } else { $key = substr(&initcaps(@autharr[1]),1, (length(@autharr[1])<=$wholen ? length(@autharr[1]) : $abbrlen)) . $etalstr; } } } $key .= $year; $citelist{$key}++; if ($citelist{$key} > 1) { if ($citelist{$key}==2) { print STDERR "Multiple cite keys on",$key; } $key .= substr("abcdefghijklm",$citelist{$key},1); } $key; } sub strip { local ($val) = $_[1]; $val =~ s/....(.*)/\1/; $val; } sub header { write; $citekey = &getcitekey($entry{'author'},$entry{'year'}); $line = '@article{' . $citekey . ','; write; } sub author { @arr = split(/;/,$entry{'author'}); foreach $auth ( @arr) { ($sname,$fname) = split(/, /, $auth); $fname =~ s/\w/$&./g; $sname = do initcaps($sname); $sname = &formulae(&specialauthor($sname)); $auth = $sname . ', ' . $fname ; } $line = @outtags{'author'} . join(' and ', @arr) . $midsep; $line = do texsafety ($line); write; } sub title { $line = @outtags{'title'} . &firstupper($entry{'title'}) . $midsep; $line = &texsafety(&formulae(&capwords(&properwords(&speciesnames(&special($line)))))); write; } sub journal { $journal = &initcaps($entry{'journal'}); $line = @outtags{'journal'} . &capwords(&lowertrivial($journal)) . $midsep; write; } sub year { $line .= @outtags{'year'} . $entry{'year'} . $midsep; write; } sub volume { $line .= @outtags{'volume'} . $entry{'volume'} . $midsep; write; } sub number { if ($entry{'number'} ne '') { $line .= @outtags{'issue'} . $entry{'number'} . $midsep; write; } } # minor kluge for figuring out whether to use end- or mid-separator sub pages { $line .= @outtags{'pages'} . $entry{'pages'}; if ($lastentry eq 'pages') { $line .= $endsep } else {$line .= $midsep;} $line = do texsafety ($line); write; } sub doctype { if (($entry{'type'} ne 'ARTICLE') && ($entry{'type'} ne '')) { $line .= 'BIDS-type = "' . &lowercase($entry{'type'}); if ($lastentry eq 'type') {$line .= $endsep } else {$line .= $midsep}; write; } } sub bids_rf { if ($entry{'bids_rf'} ne '') { $line .= 'BIDS-rf = "' . &lowercase($entry{'bids_rf'}); if ($lastentry eq 'bids_rf') {$line .= $endsep } else {$line .= $midsep}; write; } } sub keywords { if ($entry{'keywords'} ne '') { $line .= 'keywords = "' . &lowercase($entry{'keywords'}); if ($lastentry eq 'keywords') {$line .= $endsep } else {$line .= $midsep}; write; } } sub terminator { $line = '}'; write; } sub clear { $entry{'author'} = ''; $entry{'title'} = ''; $entry{'journal'} = ''; $entry{'volume'} = ''; $entry{'year'} = ''; $entry{'page'} = ''; $entry{'number'} = ''; $entry{'type'} = ''; $entry{'keywords'} = ''; $entry{'bids_rf'} = ''; $lastentry = 'pages'; } sub texsafety { local ($arg) = @_; $arg =~ s/&/\\&/g; $arg =~ s/%/\\%/g; $arg; } $outcount=0; &input_properwords; &input_capwords; &input_speciesnames; &input_authornames; while (<>) { chop; # strip record separator if (/^record/i) { $type = 'header'; $entry{$type} = $_; } if (/^ti- /i) { $type = 'title'; $entry{$type} = & strip ($_) ; } if (/^au- /i) { $type = 'author'; $entry{$type} = & strip ($_) ; } if (/^jn- /i) { $type = 'journal'; $entry{$type} = & strip ($_) ; } if (/^py- /i) { $type = 'year'; $entry{$type} = & strip ($_) ; } if (/^vo- /i) { $type = 'volume'; $entry{$type} = & strip ($_) ; } if (/^no- /i) { $type = 'number'; $entry{$type} = & strip ($_) ; } if (/^pg- /i) { $type = 'pages'; $entry{$type} = & strip ($_) ; } if (/^dt- /i) { $type = 'type'; $entry{$type} = & strip ($_); if ($entry{$type} ne 'ARTICLE') {$lastentry = 'type';} } if (/^wa- /i || /^kp- /i) { $type = 'keywords'; $lastentry = 'keywords'; $entry{$type} .= & strip ($_); } if (/^rf- /i) { $type = 'bids_rf'; $lastentry = 'bids_rf'; $entry{$type} = & strip ($_); } if (/^na- /i || /^cr- /i || /^ab- /i) { # ignore author addresses and cited references and abstracts $type='none'; } if (/^ / && $type ne 'none' ) { $entry{$type} .= " " . & strip($_); } if (/^ *$/ && $type ne 'none' && $type ne 'header' ) { & header; & author; & title; & journal; & year; & volume; & number; & pages; & doctype; & bids_rf; & keywords; & terminator; $type = 'none'; & clear; } }