Skip to content

Commit

Permalink
remove regexp
Browse files Browse the repository at this point in the history
back to "/dp/" from "/(d|g)p/" seems to solve the ego issue in digitalmethodsinitiative#2
  • Loading branch information
iosonosempreio committed Dec 14, 2021
1 parent 7a1e740 commit b60477f
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion carousels.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
own_img = own_img ? own_img.getAttribute('src') : null;

let item_metadata = {
asin: document.location.href.split(/(d|g)p/)[1].split('/')[0],
asin: document.location.href.split('/dp/')[1].split('/')[0],
label: own_title.innerText,
author: null,
rank: 0,
Expand Down
4 changes: 2 additions & 2 deletions scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def generate_recommendation_network(seeds, depth=0, prefix="", max_carousel_item
while current_depth <= depth:
# we use ASINs as unique identifiers - they can be found at a
# predictable place in the product page URL
seed_asins |= set([re.split("/(d|g)p/", seed)[1].split("/")[0] for seed in seeds])
seed_asins |= set([seed.split("/dp/")[1].split("/")[0] for seed in seeds])
if not initial_asins:
initial_asins = seed_asins

Expand All @@ -130,7 +130,7 @@ def generate_recommendation_network(seeds, depth=0, prefix="", max_carousel_item
print("- no results, link may be invalid")
continue

seed_asin = re.split("/(d|g)p/", seed)[1].split("/")[0]
seed_asin = seed.split("/dp/")[1].split("/")[0]

# process recommendations
for list_title, list_items in recommendations.items():
Expand Down

0 comments on commit b60477f

Please sign in to comment.