Skip to content

Commit 07a898c

Browse files
committed
feat: update js rendering
1 parent 2a02cb9 commit 07a898c

File tree

5 files changed

+27
-4
lines changed

5 files changed

+27
-4
lines changed

scrapegraph-py/scrapegraph_py/async_client.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,16 +439,18 @@ def new_id(prefix: str) -> str:
439439
return {"status": "mock", "url": url, "method": method, "kwargs": kwargs}
440440

441441
async def markdownify(
442-
self, website_url: str, headers: Optional[dict[str, str]] = None, stealth: bool = False
442+
self, website_url: str, headers: Optional[dict[str, str]] = None, mock: bool = False, render_heavy_js: bool = False, stealth: bool = False
443443
):
444444
"""Send a markdownify request"""
445445
logger.info(f"🔍 Starting markdownify request for {website_url}")
446446
if headers:
447447
logger.debug("🔧 Using custom headers")
448448
if stealth:
449449
logger.debug("🥷 Stealth mode enabled")
450+
if render_heavy_js:
451+
logger.debug("⚡ Heavy JavaScript rendering enabled")
450452

451-
request = MarkdownifyRequest(website_url=website_url, headers=headers, stealth=stealth)
453+
request = MarkdownifyRequest(website_url=website_url, headers=headers, mock=mock, render_heavy_js=render_heavy_js, stealth=stealth)
452454
logger.debug("✅ Request validation passed")
453455

454456
result = await self._make_request(
@@ -741,6 +743,8 @@ async def crawl(
741743
same_domain_only: bool = True,
742744
batch_size: Optional[int] = None,
743745
sitemap: bool = False,
746+
headers: Optional[dict[str, str]] = None,
747+
render_heavy_js: bool = False,
744748
stealth: bool = False,
745749
):
746750
"""Send a crawl request with support for both AI extraction and
@@ -764,6 +768,8 @@ async def crawl(
764768
logger.debug(f"🗺️ Use sitemap: {sitemap}")
765769
if stealth:
766770
logger.debug("🥷 Stealth mode enabled")
771+
if render_heavy_js:
772+
logger.debug("⚡ Heavy JavaScript rendering enabled")
767773
if batch_size is not None:
768774
logger.debug(f"📦 Batch size: {batch_size}")
769775

@@ -776,6 +782,7 @@ async def crawl(
776782
"max_pages": max_pages,
777783
"same_domain_only": same_domain_only,
778784
"sitemap": sitemap,
785+
"render_heavy_js": render_heavy_js,
779786
"stealth": stealth,
780787
}
781788

@@ -786,6 +793,8 @@ async def crawl(
786793
request_data["data_schema"] = data_schema
787794
if batch_size is not None:
788795
request_data["batch_size"] = batch_size
796+
if headers is not None:
797+
request_data["headers"] = headers
789798

790799
request = CrawlRequest(**request_data)
791800
logger.debug("✅ Request validation passed")

scrapegraph-py/scrapegraph_py/client.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,15 +452,17 @@ def new_id(prefix: str) -> str:
452452
# Generic fallback
453453
return {"status": "mock", "url": url, "method": method, "kwargs": kwargs}
454454

455-
def markdownify(self, website_url: str, headers: Optional[dict[str, str]] = None, mock:bool=False, stealth:bool=False):
455+
def markdownify(self, website_url: str, headers: Optional[dict[str, str]] = None, mock: bool = False, render_heavy_js: bool = False, stealth: bool = False):
456456
"""Send a markdownify request"""
457457
logger.info(f"🔍 Starting markdownify request for {website_url}")
458458
if headers:
459459
logger.debug("🔧 Using custom headers")
460460
if stealth:
461461
logger.debug("🥷 Stealth mode enabled")
462+
if render_heavy_js:
463+
logger.debug("⚡ Heavy JavaScript rendering enabled")
462464

463-
request = MarkdownifyRequest(website_url=website_url, headers=headers, mock=mock, stealth=stealth)
465+
request = MarkdownifyRequest(website_url=website_url, headers=headers, mock=mock, render_heavy_js=render_heavy_js, stealth=stealth)
464466
logger.debug("✅ Request validation passed")
465467

466468
result = self._make_request(
@@ -749,6 +751,8 @@ def crawl(
749751
same_domain_only: bool = True,
750752
batch_size: Optional[int] = None,
751753
sitemap: bool = False,
754+
headers: Optional[dict[str, str]] = None,
755+
render_heavy_js: bool = False,
752756
stealth: bool = False,
753757
):
754758
"""Send a crawl request with support for both AI extraction and
@@ -772,6 +776,8 @@ def crawl(
772776
logger.debug(f"🗺️ Use sitemap: {sitemap}")
773777
if stealth:
774778
logger.debug("🥷 Stealth mode enabled")
779+
if render_heavy_js:
780+
logger.debug("⚡ Heavy JavaScript rendering enabled")
775781
if batch_size is not None:
776782
logger.debug(f"📦 Batch size: {batch_size}")
777783

@@ -784,6 +790,7 @@ def crawl(
784790
"max_pages": max_pages,
785791
"same_domain_only": same_domain_only,
786792
"sitemap": sitemap,
793+
"render_heavy_js": render_heavy_js,
787794
"stealth": stealth,
788795
}
789796

@@ -794,6 +801,8 @@ def crawl(
794801
request_data["data_schema"] = data_schema
795802
if batch_size is not None:
796803
request_data["batch_size"] = batch_size
804+
if headers is not None:
805+
request_data["headers"] = headers
797806

798807
request = CrawlRequest(**request_data)
799808
logger.debug("✅ Request validation passed")

scrapegraph-py/scrapegraph_py/models/markdownify.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ class MarkdownifyRequest(BaseModel):
2727
website_url: URL of the website to convert to markdown
2828
headers: Optional HTTP headers including cookies
2929
mock: Whether to use mock mode for testing
30+
render_heavy_js: Whether to render heavy JavaScript on the page
31+
stealth: Enable stealth mode to avoid bot detection
3032
3133
Example:
3234
>>> request = MarkdownifyRequest(website_url="https://example.com")
@@ -42,6 +44,7 @@ class MarkdownifyRequest(BaseModel):
4244
"and user agent",
4345
)
4446
mock: bool = Field(default=False, description="Whether to use mock mode for the request")
47+
render_heavy_js: bool = Field(default=False, description="Whether to render heavy JavaScript on the page")
4548
stealth: bool = Field(default=False, description="Enable stealth mode to avoid bot detection")
4649

4750
@model_validator(mode="after")
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

0 commit comments

Comments
 (0)