diff --git a/.gitignore b/.gitignore index 9f61567..d8bf989 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ node_modules/ snapshot.jpg -.env \ No newline at end of file +.env +myenv/ \ No newline at end of file diff --git a/README.md b/README.md index 5dd47d8..f3019b6 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,18 @@ Wait for few seconds (adjust `const timeout = 6000;` if too slow), and `snapsh This next part is better than a lot of OCR software for common tasks- in my opinion. +Set up Python environment and install packages: +``` +python3 -m venv myenv +source myenv/bin/activate +``` + +``` +pip install -r requirements.txt +``` + + + Edit the following lines in `gpt4v_scraper.py`, replacing with your own website URL and then a system prompt (command to the GPT-4V API) about what to scrape for. See my example: ``` diff --git a/package-lock.json b/package-lock.json index 5434043..868a781 100644 --- a/package-lock.json +++ b/package-lock.json @@ -842,9 +842,9 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" }, "node_modules/ip": { - "version": "1.1.8", - "resolved": "https://registry.npmjs.org/ip/-/ip-1.1.8.tgz", - "integrity": "sha512-PuExPYUiu6qMBQb4l06ecm6T6ujzhmh+MeJcW9wa89PoAz5pvd4zPgN5WJV104mb6S2T1AwNIAaB70JNrLQWhg==" + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/ip/-/ip-1.1.9.tgz", + "integrity": "sha512-cyRxvOEpNHNtchU3Ln9KC/auJgup87llfQpQ+t5ghoC/UhL16SWzbueiCsdTnWmqAWl7LadfuwhlqmtOaqMHdQ==" }, "node_modules/is-arrayish": { "version": "0.2.1", @@ -1537,9 +1537,9 @@ } }, "node_modules/socks/node_modules/ip": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ip/-/ip-2.0.0.tgz", - "integrity": "sha512-WKa+XuLG1A1R0UWhl2+1XQSi+fZWMsYKffMZTTYsiZaUD8k2yDAj5atimTUD2TZkyCkNEeYE5NhFZmupOGtjYQ==" + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/ip/-/ip-2.0.1.tgz", + "integrity": "sha512-lJUL9imLTNi1ZfXT+DU6rBBdbiKGBuay9B6xGSPVjUeQwaH1RIGqef8RZkUtHioLmSNpPR5M4HVKJGm1j8FWVQ==" }, "node_modules/source-map": { "version": "0.6.1", diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2f0d4a2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +annotated-types==0.6.0 +anyio==4.2.0 +certifi==2023.11.17 +distro==1.9.0 +h11==0.14.0 +httpcore==1.0.2 +httpx==0.26.0 +idna==3.6 +openai==1.7.1 +pydantic==2.5.3 +pydantic_core==2.14.6 +python-dotenv==1.0.0 +sniffio==1.3.0 +tqdm==4.66.1 +typing_extensions==4.9.0 \ No newline at end of file