-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzk_search.R
92 lines (77 loc) · 2.32 KB
/
zk_search.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
library(httr)
library(data.table)
library(mongolite)
library(jsonlite)
# SET UP ------------------------------------------------------------------
# Terms to search for
terms = c("INA d.d.", "Ina - industrija nafte d.d.", "27759560625")
# Feilds from json we would like to keep
projection_list = list(
`_id` = 0,
`lrUnitId` = 1,
`lrUnitNumber` = 1,
`mainBookId` = 1,
`mainBookName` = 1,
`cadastreMunicipalityId` = 1,
institutionName = 1
)
projection_json = toJSON(projection_list, auto_unbox = TRUE)
# DATA API SEARCH ---------------------------------------------------------
# DATA API search loop
zk_l = lapply(terms, function(x) {
p = GET("http://dac.hr/api/v1/query",
query = list(
q = x,
history = "true",
limit = 20000,
skip = 0
),
add_headers(`X-DataApi-Key` = Sys.getenv("TOKEN")))
res = content(p)
res = rbindlist(res$hits)
as.data.table(cbind.data.frame(term = x, res))
})
lapply(zk_l, function(x) nrow(x))
zkdt = rbindlist(zk_l)
zkdt = unique(zkdt)
# Specific filtering
# 1) Trezor invest
zkdt = zkdt[type == "zk"]
# MONGO SEARCH ------------------------------------------------------------
# Exact search by OIB
oibs = c("27759560625", "15538072333")
con = mongo(collection = "zkjson", url = Sys.getenv("MONGO_URL"))
mongo_oib = lapply(oibs, function(o) {
q = sprintf('{"ownershipSheetB.lrUnitShares.lrOwners.taxNumber": "%s"}', o)
result = con$find(q, fields = projection_json)
as.data.table(cbind.data.frame(term = o, result))
})
con$disconnect()
mongo_oib = rbindlist(mongo_oib, fill = TRUE)
mongo_oib = mongo_oib[, lapply(.SD, unlist)]
mongo_oib[, unique(institutionName)]
# Standard Atlas search
pipeline_list = list(
list(
"$search" = list(
"index" = "standard",
"text" = list(
"query" = terms,
"path" = c("ownershipSheetB.lrUnitShares.lrOwners.name",
"ownershipSheetB.lrUnitShares.ubSharesAndEntries.lrOwners.name")
)
)
),
list(
"$limit" = 50000
)
)
pipeline_json = toJSON(pipeline_list, auto_unbox = TRUE)
con = mongo(collection = "zkjson", url = Sys.getenv("MONGO_URL"))
result = con$aggregate(pipeline_json)
con$disconnect()
# ANALYSE RESULTS ---------------------------------------------------------
#
dim(result)
# Save
fwrite(zkdt, "INA_podaci.csv")