lndj · agentfarmx · Mar 14, 2025
diff --git a/images.py b/images.py
@@ -7,15 +7,86 @@
 
 import re
 import os
+import requests
+from bs4 import BeautifulSoup
+import time
+import random
 
 PAGE_START = 1
 #default 30 pages
 PAGE_END = 30
 DIR_PATH = '/path/to/1024'
 
-#1024 has much urls and it is changing always
-URL = 'http://cl.comcl.org/'
-#URL = 'http://t66y.com/'
+# Function to find a working URL through search
+def find_working_url():
+    search_terms = ["草榴社区", "1024 cl", "t66y"]
+    search_url = "https://www.google.com/search?q="
+
+    # Try different search terms
+    for term in search_terms:
+        try:
+            # Add a random delay to avoid being blocked
+            time.sleep(random.uniform(1, 3))
+
+            # Use a custom user agent to avoid being blocked
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+
+            # Perform the search
+            response = requests.get(search_url + term, headers=headers)
+            soup = BeautifulSoup(response.text, 'html.parser')
+
+            # Extract search results
+            search_results = soup.find_all('a')
+            potential_urls = []
+
+            # Common domains used by the site
+            known_domains = ['t66y.com', 'cl.', '1024', 'caoliu']
+
+            # Check each result
+            for result in search_results:
+                href = result.get('href')
+                if href and 'http' in href:
+                    # Extract actual URL from Google redirect
+                    if '/url?q=' in href:
+                        href = href.split('/url?q=')[1].split('&')[0]
+
+                    # Check if this URL contains any of the known domains
+                    if any(domain in href for domain in known_domains):
+                        # Verify the URL is accessible
+                        try:
+                            test_response = requests.get(href, headers=headers, timeout=5)
+                            if test_response.status_code == 200:
+                                # Ensure URL ends with a slash
+                                if not href.endswith('/'):
+                                    href += '/'
+                                potential_urls.append(href)
+                        except:
+                            continue
+
+            # Return the first working URL found
+            if potential_urls:
+                return potential_urls[0]
+
+        except Exception as e:
+            print(f"Error searching with term '{term}': {str(e)}")
+            continue
+
+    # If no URL is found, return one of the known URLs as fallback
+    return "http://t66y.com/"
+
+# Try to find a working URL, otherwise use the default
+try:
+    URL = find_working_url()
+    print(f"Using automatically found URL: {URL}")
+except Exception as e:
+    print(f"Error finding URL automatically: {str(e)}")
+    # Fallback URLs
+    URL = 'http://cl.comcl.org/'
+    # Alternative URL
+    # URL = 'http://t66y.com/'
+
 START_URL = URL + 'thread0806.php?fid=8'
 
 class Handler(BaseHandler):