-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAPI_Calls.py
160 lines (121 loc) · 4.91 KB
/
API_Calls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import requests
import time
import math
# insert api token
api_token = ""
# called in _get_user_data. Returns all pages that a user follows
def _get_user_subscriptions(user_id):
try:
# The VK api returns 1000 results a time so we need to make multiple requests to collect all data
amount_of_followed_pages = _get_followed_pages_amount(user_id)
if amount_of_followed_pages <= 1000:
params = (
('access_token', api_token),
('v', '5.65'),
('count', '1000'),
('user_id', '{0}'.format(user_id)),
)
response = requests.get('https://api.vk.com/method/groups.get', params=params)
response = response.json()
subscriptions = response["response"]["items"]
return subscriptions
else:
subscriptions = []
# calculate the amount of required requests
needed_requests = math.ceil(amount_of_followed_pages / 1000)
offset = 0
for i in range(0, needed_requests):
if i % 3 == 0:
time.sleep(1)
params = (
('access_token', api_token),
('v', '5.65'),
('count', '1000'),
('offset', '{0}'.format(offset)),
('user_id', '{0}'.format(user_id)),
)
response = requests.get('https://api.vk.com/method/groups.get', params=params)
response = response.json()
subscriptions += response["response"]["items"]
offset += 1000
return subscriptions
# if a user has a private page or doesnt allow us to view his subscriptions we raise a KeyError
# Which is handled in _get_user_data()
except KeyError:
raise KeyError
# supporting function for _get_user_subscriptions()
# returns Int amount of followed pages for a user
def _get_followed_pages_amount(user_id):
params = (
('access_token', api_token),
('v', '5.65'),
('user_id', '{0}'.format(user_id)),
)
response = requests.get('https://api.vk.com/method/groups.get', params=params)
response = response.json()
return response["response"]["count"]
# supporting function for _get_members_list()
# returns Int amount of page followers
def _get_subscriber_amount(community_name):
params = (
('access_token', api_token),
('v', '5.65'),
('group_id', '{0}'.format(community_name)),
('fields', 'members_count'),
)
response = requests.get('https://api.vk.com/method/groups.getById', params=params)
response = response.json()
return response["response"][0]["members_count"]
# gathers all subscriptions for all page members and puts them into a dictionary
def _get_user_data(members):
data_dict = {
}
for i in range(0, len(members)):
if i % 3 == 0:
time.sleep(1)
try:
subscriptions = ','.join(map(str, _get_user_subscriptions(members[i])))
# the key is the user id and the value are his subscriptions
data_dict[members[i]] = subscriptions
except KeyError:
pass
return data_dict
# Main function. Gets all members for a community and finds all followed pages for each member
def main_call(community_name):
members = _get_members_list(community_name)
print("gathered members.")
data = _get_user_data(members)
print("finished gathering all data.")
# after all data is collected we write this data to a CSV file
with open('data.csv', 'w') as f:
for key in data.keys():
f.write("%s,%s\n" % (key, data[key]))
# We get 1000 results at a time from the VK API
# So we calculate the amount of needed requests and increase the query offset each time
def _get_members_list(community_name):
members = []
amount_of_subscribers = _get_subscriber_amount(community_name)
needed_requests = math.ceil(amount_of_subscribers / 1000)
offset = 0
for i in range(0, needed_requests):
if i % 3 == 0:
time.sleep(1)
params = (
('access_token', api_token),
('v', '5.65'),
('count', '1000'),
('offset', '{0}'.format(offset)),
('group_id', '{0}'.format(community_name)),
)
response = requests.get('https://api.vk.com/method/groups.getMembers', params=params)
response = response.json()
members += response["response"]["items"]
offset += 1000
return members
# Function call + execution timer
t0 = time.time()
# insert any community name here
main_call("")
t1 = time.time()
total = t1-t0
print("All data collected and written to CSV file. It to took: {0} seconds".format(total))