papers2ads.py

#!/usr/bin/python
""" This python script replaces entries in the bibtex library created by 
Papers with those found in ADS (Astrophysics Data System) without modifying 
the cite-key --- Daniel Perez-Becker - perezbecker.com 

Papers is a wonderfull tool to download and organize academic papers. Unfortunaelly,
the metadata it collects is often incomplete, so that article bibliographies have to be
curated by hand. Here is where papers2ads comes in. It will read the bibtex
file exported by Papers and fetch the corresponding references from ADS and 
merge them with the human-readable citation keys (citekeys) generated by Papers. One
ends up with the best of both worlds: Compelte and curated ADS bibtex metadata and
the abilty to search and organize articles with Papers software.  

*** To run papers2ads.py on your Papers library, simply export the library as a
bibtex file (e.g., test.bib) and then run:
- python papers2ads test.bib -

"""
from tempfile import mkstemp
from shutil import move
from os import remove, close
import urllib, string, fileinput, sys

def dreplace(file, pattern, subst):
    #Create temp file
    fh, abs_path = mkstemp()
    new_file = open(abs_path,'w')
    old_file = open(file)
    for line in old_file:
        new_file.write(line.replace(pattern, subst))
    #close temp file
    new_file.close()
    close(fh)
    old_file.close()
    #Remove original file
    remove(file)
    #Move new file
    move(abs_path, file)


print " "
print "***   Papers to ADS bibtex converter   ***" 
print "Only articles downloaded with the Papers ADS search engine will be considered" 
print "Your input file is:",sys.argv[1] 
print "Your output file is:", 'ads'+sys.argv[1]

raw_input('Press Enter to continue...')

file=sys.argv[1]

bibcode=list()
citekey=list()
auxurl=list()

for line in open(file):
  if "pmid = {" in line:
    #print line
    bibcode.append(line[8:27])
  
for line in open(file):
  if "@article{" in line:
    #print line
    citekey.append(line)

for line in open(file):
  if "URL =" in line:
    #print line
    auxurl.append(line[14:17])

if(len(bibcode)==len(citekey) and len(bibcode)==len(auxurl)):

  outfile=open('ads'+sys.argv[1], 'w')
  for j in range(0,len(bibcode)):
    if(auxurl[j]=='ads'):
      y=bibcode[j]
      print "Fetching",bibcode[j]," from",auxurl[j]," ",int(100.*float(j)/float(len(bibcode))),"% done"
    #raw_input('Check and press Enter...')
      url='http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode='+bibcode[j]+'&data_type=BIBTEX'
    #print url
      ref=urllib.urlopen(url)
      text=ref.readlines()
    # remove CR from list
      text.pop()
      for i in text:
        if "@ARTICLE" in i:
          print "%"+ i,
          print citekey[j],
        #outfile.write("%"+i)
          outfile.write(citekey[j])
        elif "@BOOK" in i:
          outfile.write("@BOOK{"+citekey[j][9:])
        elif "@INPROCEEDINGS" in i:
          outfile.write("@INPROCEEDINGS{"+citekey[j][9:])
        else:
          print i,
          outfile.write(i)
        
      ref.close()

  outfile.close()
  myfile='ads'+sys.argv[1]
  dreplace(myfile,"Query Results from the ADS Database"," ")
  dreplace(myfile,"Retrieved 1 abstracts, starting with number 1.  Total number selected: 1."," ")
else:
  print "ERROR!!!!"
  print "Please clean up your Papers archive and try again. Specifically make sure that all articles in your archive have:"
  print "(1) A citekey"
  print "(2) An ID (bibcode)"
  print "(3) A download url"