diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 0c362a8..8431e3e 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -6,15 +6,11 @@ name: Upload Python Package
on:
release:
types: [published]
- paths:
- - 'scrapegraph-py/**'
jobs:
deploy:
runs-on: ubuntu-latest
- # Only run if scrapegraph-py has changes
- if: contains(github.event.release.body, 'scrapegraph-py/')
steps:
- uses: actions/checkout@v4
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index cb2e3a6..2f7a203 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -4,15 +4,11 @@ on:
branches:
- main
- pre/*
- paths:
- - 'scrapegraph-py/**'
jobs:
build:
name: Build
runs-on: ubuntu-latest
- # Only run if scrapegraph-py has changes
- if: contains(github.event.head_commit.modified, 'scrapegraph-py/') || contains(github.event.head_commit.added, 'scrapegraph-py/') || contains(github.event.head_commit.removed, 'scrapegraph-py/')
steps:
- name: Install git
run: |
diff --git a/scrapegraph-js/README.md b/scrapegraph-js/README.md
index 9ed7150..fe68c7e 100644
--- a/scrapegraph-js/README.md
+++ b/scrapegraph-js/README.md
@@ -35,6 +35,7 @@ yarn add scrapegraph-js
```javascript
import { smartScraper } from 'scrapegraph-js';
+import 'dotenv/config';
// Initialize variables
const apiKey = process.env.SGAI_APIKEY; // Set your API key as an environment variable
@@ -105,12 +106,43 @@ const schema = z.object({
})();
```
+### Scraping local HTML
+
+Extract structured data from local HTML content
+
+```javascript
+import { localScraper } from 'scrapegraph-js';
+
+const apiKey = 'your_api_key';
+const prompt = 'What does the company do?';
+
+const websiteHtml = `
+
+ Company Name
+ We are a technology company focused on AI solutions.
+
+
+ `;
+(async () => {
+ try {
+ const response = await localScraper(apiKey, websiteHtml, prompt);
+ console.log(response);
+ } catch (error) {
+ console.error(error);
+ }
+})();
+```
+
### Markdownify
+
Converts a webpage into clean, well-structured markdown format.
+
```javascript
import { smartScraper } from 'scrapegraph-js';
-const apiKey = "your_api_key";
+const apiKey = 'your_api_key';
const url = 'https://scrapegraphai.com/';
(async () => {
@@ -123,7 +155,6 @@ const url = 'https://scrapegraphai.com/';
})();
```
-
### Checking API Credits
```javascript
diff --git a/scrapegraph-js/examples/localScraper_example.js b/scrapegraph-js/examples/localScraper_example.js
new file mode 100644
index 0000000..95552e5
--- /dev/null
+++ b/scrapegraph-js/examples/localScraper_example.js
@@ -0,0 +1,33 @@
+import { localScraper, getLocalScraperRequest } from 'scrapegraph-js';
+import 'dotenv/config';
+
+// localScraper function example
+const apiKey = process.env.SGAI_APIKEY;
+const prompt = 'What does the company do?';
+
+const websiteHtml = `
+
+ Company Name
+ We are a technology company focused on AI solutions.
+
+
+ `;
+
+try {
+ const response = await localScraper(apiKey, websiteHtml, prompt);
+ console.log(response);
+} catch (error) {
+ console.error(error);
+}
+
+// getLocalScraperFunctionExample
+const requestId = 'b8d97545-9ed3-441b-a01f-4b661b4f0b4c';
+
+try {
+ const response = await getLocalScraperRequest(apiKey, requestId);
+ console.log(response);
+} catch (error) {
+ console.log(error);
+}
diff --git a/scrapegraph-js/examples/schema_localScraper_example.js b/scrapegraph-js/examples/schema_localScraper_example.js
new file mode 100644
index 0000000..1de6344
--- /dev/null
+++ b/scrapegraph-js/examples/schema_localScraper_example.js
@@ -0,0 +1,28 @@
+import { localScraper } from 'scrapegraph-js';
+import { z } from 'zod';
+import 'dotenv/config';
+
+// localScraper function example
+const apiKey = process.env.SGAI_APIKEY;
+const prompt = 'extract contact';
+
+const websiteHtml = `
+
+ Company Name
+ We are a technology company focused on AI solutions.
+
+
+ `;
+
+const schema = z.object({
+ contact: z.string().describe('email contact'),
+});
+
+try {
+ const response = await localScraper(apiKey, websiteHtml, prompt, schema);
+ console.log(response);
+} catch (error) {
+ console.error(error);
+}
diff --git a/scrapegraph-js/index.js b/scrapegraph-js/index.js
index ca4dbb7..1e4c1c5 100644
--- a/scrapegraph-js/index.js
+++ b/scrapegraph-js/index.js
@@ -1,4 +1,5 @@
export { smartScraper, getSmartScraperRequest } from './src/smartScraper.js';
export { markdownify, getMarkdownifyRequest } from './src/markdownify.js';
+export { localScraper, getLocalScraperRequest } from './src/localScraper.js';
export { getCredits } from './src/credits.js';
export { sendFeedback } from './src/feedback.js';
diff --git a/scrapegraph-js/src/localScraper.js b/scrapegraph-js/src/localScraper.js
new file mode 100644
index 0000000..412aac4
--- /dev/null
+++ b/scrapegraph-js/src/localScraper.js
@@ -0,0 +1,66 @@
+import axios from 'axios';
+import handleError from './utils/handleError.js';
+import { ZodType } from 'zod';
+import { zodToJsonSchema } from 'zod-to-json-schema';
+
+/**
+ * Extract structured data from local HTML content using ScrapeGraph AI.
+ *
+ * @param {string} apiKey - The API key for ScrapeGraph AI.
+ * @param {string} websiteHtml - HTML content as a string from the local web page to scrape.
+ * @param {string} prompt - A natural language description of the data to extract.
+ * @param {Object} [schema] - (Optional) Schema object defining the structure of the desired output.
+ * @returns {Promise} A JSON string containing the extracted data, formatted to match the schema.
+ * @throws {Error} If an HTTP error or validation issue occurs.
+ */
+export async function localScraper(apiKey, websiteHtml, prompt, schema = null) {
+ const endpoint = 'https://api.scrapegraphai.com/v1/localscraper';
+ const headers = {
+ 'accept': 'application/json',
+ 'SGAI-APIKEY': apiKey,
+ 'Content-Type': 'application/json',
+ };
+
+ const payload = {
+ website_html: websiteHtml,
+ user_prompt: prompt,
+ };
+
+ if (schema) {
+ if (schema instanceof ZodType) {
+ payload.output_schema = zodToJsonSchema(schema);
+ } else {
+ throw new Error('The schema must be an instance of a valid Zod schema');
+ }
+ }
+
+ try {
+ const response = await axios.post(endpoint, payload, { headers });
+ return response.data;
+ } catch (error) {
+ handleError(error);
+ }
+}
+
+/**
+ * Retrieve the status or result of a localScraper request, including results of previous requests.
+ *
+ * @param {string} apiKey - The API key for ScrapeGraph AI.
+ * @param {string} requestId - The unique ID associated with the localScraper request.
+ * @returns {Promise} A JSON string containing the status or result of the scraping request.
+ * @throws {Error} If an error occurs while retrieving the request details.
+ */
+export async function getLocalScraperRequest(apiKey, requestId) {
+ const endpoint = 'https://api.scrapegraphai.com/v1/localscraper/' + requestId;
+ const headers = {
+ 'accept': 'application/json',
+ 'SGAI-APIKEY': apiKey,
+ };
+
+ try {
+ const response = await axios.get(endpoint, { headers });
+ return response.data;
+ } catch (error) {
+ handleError(error);
+ }
+}
diff --git a/scrapegraph-js/src/markdownify.js b/scrapegraph-js/src/markdownify.js
index 5a1d4e5..14ae0e3 100644
--- a/scrapegraph-js/src/markdownify.js
+++ b/scrapegraph-js/src/markdownify.js
@@ -9,7 +9,7 @@ import handleError from './utils/handleError.js';
* @returns {Promise} A promise that resolves to the markdown representation of the webpage.
* @throws {Error} Throws an error if the HTTP request fails.
*/
-export async function markdownify(apiKey, url){
+export async function markdownify(apiKey, url) {
const endpoint = 'https://api.scrapegraphai.com/v1/markdownify';
const headers = {
'accept': 'application/json',
@@ -24,7 +24,7 @@ export async function markdownify(apiKey, url){
const response = await axios.post(endpoint, payload, { headers });
return response.data;
} catch (error) {
- handleError(error)
+ handleError(error);
}
}
@@ -36,7 +36,7 @@ export async function markdownify(apiKey, url){
* @returns {Promise} A promise that resolves with details about the status or outcome of the specified request.
* @throws {Error} Throws an error if the HTTP request fails.
*/
-export async function getMarkdownifyRequest(apiKey, requestId){
+export async function getMarkdownifyRequest(apiKey, requestId) {
const endpoint = 'https://api.scrapegraphai.com/v1/markdownify/' + requestId;
const headers = {
'accept': 'application/json',
@@ -47,6 +47,6 @@ export async function getMarkdownifyRequest(apiKey, requestId){
const response = await axios.get(endpoint, { headers });
return response.data;
} catch (error) {
- handleError(error)
+ handleError(error);
}
-}
\ No newline at end of file
+}
diff --git a/scrapegraph-py/CHANGELOG.md b/scrapegraph-py/CHANGELOG.md
index 26cc6f1..6654cac 100644
--- a/scrapegraph-py/CHANGELOG.md
+++ b/scrapegraph-py/CHANGELOG.md
@@ -1,3 +1,48 @@
+## [1.9.0-beta.5](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.9.0-beta.4...v1.9.0-beta.5) (2025-01-03)
+
+
+### Bug Fixes
+
+* updated hatchling version ([740933a](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/740933aff79a5873e6d1c633afcedb674d1f4cf0))
+
+## [1.9.0-beta.4](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.9.0-beta.3...v1.9.0-beta.4) (2025-01-03)
+
+
+### Bug Fixes
+
+* improve api desc ([62243f8](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/62243f84384ae238c0bd0c48abc76a6b99376c74))
+
+## [1.9.0-beta.3](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.9.0-beta.2...v1.9.0-beta.3) (2024-12-10)
+
+
+### Bug Fixes
+
+* come back to py 3.10 ([26d3a75](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/26d3a75ed973590e21d55c985bf71f3905a3ac0e))
+
+## [1.9.0-beta.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.9.0-beta.1...v1.9.0-beta.2) (2024-12-10)
+
+
+### Bug Fixes
+
+* add new python compatibility ([77b67f6](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/77b67f646d75abd3a558b40cb31c52c12cc7182e))
+
+## [1.9.0-beta.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.8.0...v1.9.0-beta.1) (2024-12-10)
+
+
+### Features
+
+* add localScraper functionality ([8701eb2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/8701eb2ca7f108b922eb1617c850a58c0f88f8f9))
+* revert to old release ([d88a3ac](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/d88a3ac6969a0abdf1f6b8eccde9ad8284d41d20))
+
+
+### Bug Fixes
+
+* .toml file ([e719881](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/e7198817d8dac802361ab84bc4d5d961fb926767))
+* add revert ([09257e0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/09257e08246d8aee96b3944ac14cc14b88e5f818))
+* minor fix version ([0b972c6](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/0b972c69a9ea843d8ec89327f35c287b0d7a2bb4))
+* pyproject ([2440f7f](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/2440f7f2a5179c6e3a86faf4eefa1d5edf7524c8))
+* python version ([24366b0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/24366b08eefe0789da9a0ccafb8058e8744ee58b))
+
## [1.8.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.7.0...v1.8.0) (2024-12-08)
diff --git a/scrapegraph-py/pyproject.toml b/scrapegraph-py/pyproject.toml
index da5ef04..f885b92 100644
--- a/scrapegraph-py/pyproject.toml
+++ b/scrapegraph-py/pyproject.toml
@@ -92,7 +92,7 @@ disallow_untyped_calls = true
ignore_missing_imports = true
[build-system]
-requires = ["hatchling"]
+requires = ["hatchling==1.26.3"]
build-backend = "hatchling.build"
[tool.poe.tasks]
diff --git a/scrapegraph-py/scrapegraph_py/utils/helpers.py b/scrapegraph-py/scrapegraph_py/utils/helpers.py
index b5e3c28..7e5d7d4 100644
--- a/scrapegraph-py/scrapegraph_py/utils/helpers.py
+++ b/scrapegraph-py/scrapegraph_py/utils/helpers.py
@@ -17,7 +17,7 @@ def validate_api_key(api_key: str) -> bool:
UUID(uuid_part)
except ValueError:
raise ValueError(
- "Invalid API key format. API key must be 'sgai-' followed by a valid UUID."
+ "Invalid API key format. API key must be 'sgai-' followed by a valid UUID. You can get one at https://dashboard.scrapegraphai.com/"
)
return True
diff --git a/scrapegraph-py/uv.lock b/scrapegraph-py/uv.lock
index 1990785..bb5cf94 100644
--- a/scrapegraph-py/uv.lock
+++ b/scrapegraph-py/uv.lock
@@ -557,11 +557,11 @@ wheels = [
[[package]]
name = "idna"
-version = "3.10"
+version = "3.9"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 }
+sdist = { url = "https://files.pythonhosted.org/packages/00/6f/93e724eafe34e860d15d37a4f72a1511dd37c43a76a8671b22a15029d545/idna-3.9.tar.gz", hash = "sha256:e5c5dafde284f26e9e0f28f6ea2d6400abd5ca099864a67f576f3981c6476124", size = 191636 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
+ { url = "https://files.pythonhosted.org/packages/6d/15/61933d1999bc5ad8cad612d67f02fa5b16a423076ea0816e39c2e797af12/idna-3.9-py3-none-any.whl", hash = "sha256:69297d5da0cc9281c77efffb4e730254dd45943f45bbfb461de5991713989b1e", size = 71671 },
]
[[package]]