-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpapers2ads.py
111 lines (93 loc) · 3.41 KB
/
papers2ads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/python
""" This python script replaces entries in the bibtex library created by
Papers with those found in ADS (Astrophysics Data System) without modifying
the cite-key --- Daniel Perez-Becker - perezbecker.com
Papers is a wonderfull tool to download and organize academic papers. Unfortunaelly,
the metadata it collects is often incomplete, so that article bibliographies have to be
curated by hand. Here is where papers2ads comes in. It will read the bibtex
file exported by Papers and fetch the corresponding references from ADS and
merge them with the human-readable citation keys (citekeys) generated by Papers. One
ends up with the best of both worlds: Compelte and curated ADS bibtex metadata and
the abilty to search and organize articles with Papers software.
*** To run papers2ads.py on your Papers library, simply export the library as a
bibtex file (e.g., test.bib) and then run:
- python papers2ads test.bib -
"""
from tempfile import mkstemp
from shutil import move
from os import remove, close
import urllib, string, fileinput, sys
def dreplace(file, pattern, subst):
#Create temp file
fh, abs_path = mkstemp()
new_file = open(abs_path,'w')
old_file = open(file)
for line in old_file:
new_file.write(line.replace(pattern, subst))
#close temp file
new_file.close()
close(fh)
old_file.close()
#Remove original file
remove(file)
#Move new file
move(abs_path, file)
print " "
print "*** Papers to ADS bibtex converter ***"
print "Only articles downloaded with the Papers ADS search engine will be considered"
print "Your input file is:",sys.argv[1]
print "Your output file is:", 'ads'+sys.argv[1]
raw_input('Press Enter to continue...')
file=sys.argv[1]
bibcode=list()
citekey=list()
auxurl=list()
for line in open(file):
if "pmid = {" in line:
#print line
bibcode.append(line[8:27])
for line in open(file):
if "@article{" in line:
#print line
citekey.append(line)
for line in open(file):
if "URL =" in line:
#print line
auxurl.append(line[14:17])
if(len(bibcode)==len(citekey) and len(bibcode)==len(auxurl)):
outfile=open('ads'+sys.argv[1], 'w')
for j in range(0,len(bibcode)):
if(auxurl[j]=='ads'):
y=bibcode[j]
print "Fetching",bibcode[j]," from",auxurl[j]," ",int(100.*float(j)/float(len(bibcode))),"% done"
#raw_input('Check and press Enter...')
url='http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode='+bibcode[j]+'&data_type=BIBTEX'
#print url
ref=urllib.urlopen(url)
text=ref.readlines()
# remove CR from list
text.pop()
for i in text:
if "@ARTICLE" in i:
print "%"+ i,
print citekey[j],
#outfile.write("%"+i)
outfile.write(citekey[j])
elif "@BOOK" in i:
outfile.write("@BOOK{"+citekey[j][9:])
elif "@INPROCEEDINGS" in i:
outfile.write("@INPROCEEDINGS{"+citekey[j][9:])
else:
print i,
outfile.write(i)
ref.close()
outfile.close()
myfile='ads'+sys.argv[1]
dreplace(myfile,"Query Results from the ADS Database"," ")
dreplace(myfile,"Retrieved 1 abstracts, starting with number 1. Total number selected: 1."," ")
else:
print "ERROR!!!!"
print "Please clean up your Papers archive and try again. Specifically make sure that all articles in your archive have:"
print "(1) A citekey"
print "(2) An ID (bibcode)"
print "(3) A download url"