forked from ourresearch/openalex-text-api
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkeywords.py
More file actions
78 lines (59 loc) · 2.02 KB
/
keywords.py
File metadata and controls
78 lines (59 loc) · 2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import json
import os
from marshmallow import Schema, fields
import requests
from topics import get_topic_predictions
from utils import format_score
def get_keywords_predictions(title, abstract):
api_url = "https://qapir74yac.execute-api.us-east-1.amazonaws.com/api/"
api_key = os.getenv("SAGEMAKER_API_KEY")
headers = {"X-API-Key": api_key}
topic_predictions = get_topic_predictions(title, abstract)
topic_ids = [topic["topic_id"] for topic in topic_predictions]
input_data = {
"title": title,
"abstract_inverted_index": abstract,
"inverted": False,
"topics": topic_ids,
}
r = requests.post(api_url, json=json.dumps([input_data]), headers=headers)
if r.status_code == 200:
response_json = r.json()
resp_data = response_json[0]
return resp_data
else:
print(f"Error tagging keywords: {r.status_code}")
return []
def get_keywords_from_api(keyword_ids):
r = requests.get(
"https://api.openalex.org/keywords?filter=id:{0}".format("|".join(keyword_ids))
)
keywords_from_api = r.json()["results"]
return keywords_from_api
def format_keywords(keyword_predictions, keywords_from_api):
ordered_keywords = []
for keyword in keyword_predictions:
for api_keyword in keywords_from_api:
if (
api_keyword["id"]
== f"https://openalex.org/keywords/{keyword['keyword_id']}"
):
api_keyword["score"] = format_score(keyword["score"])
ordered_keywords.append(api_keyword)
break
return ordered_keywords
class KeywordsSchema(Schema):
id = fields.Str()
display_name = fields.Str()
score = fields.Float()
class Meta:
ordered = True
class MetaSchema(Schema):
count = fields.Int()
class Meta:
ordered = True
class KeywordsMessageSchema(Schema):
meta = fields.Nested(MetaSchema)
keywords = fields.Nested(KeywordsSchema, many=True)
class Meta:
ordered = True