From eb17f882f1f5bd07ec7cba07cfe662693ed17174 Mon Sep 17 00:00:00 2001
From: hellothere012 <abbaaliharris@gmail.com>
Date: Wed, 4 Jun 2025 08:03:59 -0700
Subject: [PATCH 1/4] feat: add proxy support and cleanup

---
 .env.example                  |   7 +
 README.md                     |  25 +++
 app.py                        | 123 -----------
 app/__init__.py               |   0
 app/crud.py                   |  50 -----
 app/database.py               |  25 ---
 app/main.py                   | 265 ------------------------
 app/models.py                 |  45 ----
 app/schemas.py                |  41 ----
 app/scraper.py                | 373 ----------------------------------
 config.py                     |  34 ----
 database.py                   |  26 ---
 render.yaml                   |  20 +-
 src/api/routes.py             |   6 +-
 src/automation/browser_sim.py |   8 +
 src/config.py                 |   5 +
 16 files changed, 64 insertions(+), 989 deletions(-)
 delete mode 100644 app.py
 delete mode 100644 app/__init__.py
 delete mode 100644 app/crud.py
 delete mode 100644 app/database.py
 delete mode 100644 app/main.py
 delete mode 100644 app/models.py
 delete mode 100644 app/schemas.py
 delete mode 100644 app/scraper.py
 delete mode 100644 config.py
 delete mode 100644 database.py

diff --git a/.env.example b/.env.example
index 767687c..4744016 100644
--- a/.env.example
+++ b/.env.example
@@ -13,3 +13,10 @@ API_PORT=8000
 
 # Scraping Limits
 MAX_LISTINGS_PER_SESSION=25
+
+# Optional Proxy Configuration
+# If using rotating proxies (e.g., Webshare), uncomment and provide the proxy URL.
+# Example: http://username:password@proxyhost:port
+# PROXY_SERVER=
+# PROXY_USERNAME=
+# PROXY_PASSWORD=
diff --git a/README.md b/README.md
index f5d383b..7525e16 100644
--- a/README.md
+++ b/README.md
@@ -20,3 +20,28 @@ The scraper can also be run standalone:
 ```bash
 python main.py scrape_test
 ```
+
+## Environment Variables
+
+Set the following variables in a `.env` file or your deployment environment:
+
+| Variable | Description | Default |
+| --- | --- | --- |
+| `DATABASE_URL` | Database connection URL | `sqlite+aiosqlite:///./vehicle_data.db` |
+| `HEADLESS` | Run the browser in headless mode | `true` |
+| `BROWSER_TIMEOUT` | Playwright launch timeout (ms) | `60000` |
+| `PAGE_DELAY` | Base delay after page loads (ms) | `5000` |
+| `MIN_DELAY_BETWEEN_ACTIONS` | Delay between scraping actions (s) | `2.5` |
+| `API_HOST` | Host for the FastAPI server | `127.0.0.1` |
+| `API_PORT` | Port for the FastAPI server | `8000` |
+| `MAX_LISTINGS_PER_SESSION` | Maximum listings fetched per scrape | `25` |
+| `PROXY_SERVER` | *(Optional)* Proxy URL for Playwright | - |
+| `PROXY_USERNAME` | *(Optional)* Proxy username | - |
+| `PROXY_PASSWORD` | *(Optional)* Proxy password | - |
+
+### Pagination
+
+The `/api/v1/vehicles/` endpoint accepts `skip` and `limit` query parameters to paginate results.
+Example: `/api/v1/vehicles/?skip=25&limit=25`.
+
+
diff --git a/app.py b/app.py
deleted file mode 100644
index 8fa8500..0000000
--- a/app.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import logging
-# import os # No longer needed for getenv in background task
-import asyncio
-from fastapi import FastAPI, Depends, BackgroundTasks
-from pydantic import BaseModel
-from typing import Dict
-from datetime import datetime
-from database import CarListing, get_db, Session, SessionLocal
-from scraper import scrape_autotrader_and_update_db
-from fastapi.middleware.cors import CORSMiddleware
-from config import AUTOTRADER_URL, HEADLESS_BROWSER, SCRAPE_TIMEOUT, LOG_LEVEL # Import from config
-
-# Configure basic logging using LOG_LEVEL from config
-# Ensure this is called only once. If FastAPI/Uvicorn also configures logging,
-# this might need adjustment or to be handled by the logger instance directly.
-# For now, assume this is the primary logging config.
-logging.basicConfig(level=LOG_LEVEL, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
-# Added force=True to ensure this config takes precedence if uvicorn also tries to set basicConfig.
-
-app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-class CarListingRaw(BaseModel):
-    platform: str
-    extracted_at: datetime
-    source_url: str
-    data_points: Dict
-
-@app.post("/api/v1/listings/ingest")
-async def ingest_listing(payload: CarListingRaw, db: Session = Depends(get_db)):
-    listing = CarListing(
-        platform=payload.platform,
-        extracted_at=payload.extracted_at,
-        source_url=payload.source_url,
-        data_points=payload.data_points
-    )
-    db.add(listing)
-    db.commit()
-    db.refresh(listing)
-    return {"status": "saved", "listing_id": listing.id}
-
-@app.get("/")
-def read_root():
-    return {"message": "🚗 Car Tracker API is running!"}
-
-# Global variable to store scraping status
-scrape_status = {
-    "last_run_time": None,
-    "status": "idle", # States: idle, running, success, error
-    "message": "",
-    "added": 0,
-    "updated": 0,
-    "scraped_count": 0
-}
-
-# Background task wrapper
-async def _background_scraper_task_wrapper():
-    global scrape_status
-    db_task_session: Session = SessionLocal()
-    logging.info("Background scraper task started.")
-    scrape_status["status"] = "running"
-    scrape_status["message"] = "Scraping in progress..."
-    scrape_status["last_run_time"] = datetime.utcnow().isoformat()
-    scrape_status["added"] = 0 # Reset counts for current run
-    scrape_status["updated"] = 0
-    scrape_status["scraped_count"] = 0
-
-    try:
-        # Use imported config values
-        # autotrader_url = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/private-seller")
-        # headless_str = os.getenv("HEADLESS_BROWSER", "True")
-        # headless = headless_str.lower() == "true"
-        # scrape_timeout_str = os.getenv("SCRAPE_TIMEOUT", "120000")
-        # try:
-        #     scrape_timeout = int(scrape_timeout_str)
-        # except ValueError:
-        #     logging.warning(f"Invalid SCRAPE_TIMEOUT value: {scrape_timeout_str}. Defaulting to 120000ms.")
-        #     scrape_timeout = 120000
-
-        logging.info(f"Background task using URL: {AUTOTRADER_URL}, Headless: {HEADLESS_BROWSER}, Timeout: {SCRAPE_TIMEOUT}ms")
-
-        result = await scrape_autotrader_and_update_db(
-            db=db_task_session,
-            autotrader_url=AUTOTRADER_URL,
-            headless=HEADLESS_BROWSER,
-            scrape_timeout=SCRAPE_TIMEOUT
-        )
-
-        if result.get("status") == "success":
-            scrape_status["status"] = "success"
-            scrape_status["message"] = "Scraping completed successfully."
-            scrape_status["added"] = result.get("added", 0)
-            scrape_status["updated"] = result.get("updated", 0)
-            scrape_status["scraped_count"] = result.get("scraped_count", 0)
-        else:
-            scrape_status["status"] = "error"
-            scrape_status["message"] = result.get("message", "Scraping failed with an unknown error.")
-
-        logging.info(f"Background scraper task completed: {result}")
-
-    except Exception as e:
-        logging.error(f"Error in background scraper task: {e}", exc_info=True)
-        scrape_status["status"] = "error"
-        scrape_status["message"] = str(e)
-    finally:
-        db_task_session.close()
-        logging.info("Background scraper DB session closed.")
-
-@app.post("/api/v1/scrape/autotrader")
-async def trigger_autotrader_scrape(background_tasks: BackgroundTasks):
-    if scrape_status["status"] == "running":
-        return {"message": "AutoTrader scraping job is already running."}
-    background_tasks.add_task(_background_scraper_task_wrapper)
-    return {"message": "AutoTrader scraping job started in the background."}
-
-@app.get("/api/v1/scrape/status")
-async def get_scrape_status():
-    return scrape_status
diff --git a/app/__init__.py b/app/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/app/crud.py b/app/crud.py
deleted file mode 100644
index fb8708b..0000000
--- a/app/crud.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from sqlalchemy.orm import Session
-from . import models, schemas
-from datetime import datetime
-
-def get_car_listing_by_url(db: Session, url: str):
-    return db.query(models.ScrapedData).filter(models.ScrapedData.url == url).first()
-
-def create_car_listing(db: Session, listing: schemas.CarListingCreate):
-    db_listing = models.ScrapedData(
-        job_id=listing.job_id,
-        platform=listing.platform,
-        url=str(listing.url), # Ensure HttpUrl is converted to string
-        title=listing.title,
-        price=listing.price,
-        mileage=listing.mileage,
-        vin=listing.vin,
-        image_urls=listing.image_urls, # Assuming image_urls is already a list of strings or compatible JSON
-        raw_data=listing.raw_data,
-        scraped_at=datetime.utcnow()
-    )
-    db.add(db_listing)
-    db.commit()
-    db.refresh(db_listing)
-    return db_listing
-
-def create_scrape_job(db: Session) -> models.ScrapeJob:
-    db_job = models.ScrapeJob(timestamp=datetime.utcnow(), status="pending")
-    db.add(db_job)
-    db.commit()
-    db.refresh(db_job)
-    return db_job
-
-def update_scrape_job_status(db: Session, job_id: int, status: str, results_count: int = 0, error_message: str = None):
-    db_job = db.query(models.ScrapeJob).filter(models.ScrapeJob.id == job_id).first()
-    if db_job:
-        db_job.status = status
-        db_job.results_count = results_count
-        db_job.error_message = error_message
-        db.commit()
-        db.refresh(db_job)
-    return db_job
-
-def get_scrape_job(db: Session, job_id: int):
-    return db.query(models.ScrapeJob).filter(models.ScrapeJob.id == job_id).first()
-
-def get_all_scrape_jobs(db: Session, skip: int = 0, limit: int = 100):
-    return db.query(models.ScrapeJob).order_by(models.ScrapeJob.timestamp.desc()).offset(skip).limit(limit).all()
-
-def get_listings_for_job(db: Session, job_id: int, skip: int = 0, limit: int = 100):
-    return db.query(models.ScrapedData).filter(models.ScrapedData.job_id == job_id).offset(skip).limit(limit).all()
diff --git a/app/database.py b/app/database.py
deleted file mode 100644
index bf32154..0000000
--- a/app/database.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from sqlalchemy import create_engine
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import sessionmaker
-import os
-
-DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./data/vehicle_tracker.db")
-
-engine_args = {}
-if DATABASE_URL.startswith("sqlite"):
-    engine_args["connect_args"] = {"check_same_thread": False}
-
-engine = create_engine(DATABASE_URL, **engine_args)
-SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-
-Base = declarative_base()
-
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-
-def create_tables():
-    Base.metadata.create_all(bind=engine)
diff --git a/app/main.py b/app/main.py
deleted file mode 100644
index 4817f15..0000000
--- a/app/main.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import logging
-import os
-from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
-from sqlalchemy.orm import Session
-from typing import List
-
-from . import crud, models, schemas, scraper
-from .database import SessionLocal, engine
-
-# Create database tables if they don't exist
-models.Base.metadata.create_all(bind=engine)
-
-# Configure logging
-LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
-logging.basicConfig(level=LOG_LEVEL, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-
-app = FastAPI(title="AutoTrader Scraper API", version="1.0.0")
-
-# Dependency to get DB session
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-
-# Global variable to store scraping status (simple approach)
-scrape_status = {
-    "job_id": None,
-    "status": "idle", # States: idle, pending, running, completed, failed
-    "message": "No scraping job initiated yet.",
-    "last_run_time": None,
-    "duration_seconds": None,
-    "results_count": 0,
-    "error_message": None
-}
-
-async def run_scraping_task(job_id: int, autotrader_url: str, headless: bool, scrape_timeout: int):
-    """
-    The actual scraping task that runs in the background.
-    It creates its own database session.
-    """
-    global scrape_status
-    db: Session = SessionLocal()
-    try:
-        logger.info(f"Background task started for job_id: {job_id}")
-        crud.update_scrape_job_status(db, job_id, status="running")
-        scrape_status.update({
-            "job_id": job_id,
-            "status": "running",
-            "message": f"Scraping from {autotrader_url}...",
-            "last_run_time": datetime.utcnow().isoformat(),
-            "duration_seconds": None,
-            "results_count": 0,
-            "error_message": None
-        })
-
-        start_time = datetime.utcnow()
-
-        scraped_data_list = await scraper.scrape_autotrader_data(
-            autotrader_url=autotrader_url,
-            headless=headless,
-            timeout=scrape_timeout
-        )
-
-        end_time = datetime.utcnow()
-        duration = (end_time - start_time).total_seconds()
-        scrape_status["duration_seconds"] = round(duration, 2)
-
-        added_count = 0
-        updated_count = 0 # Placeholder for future update logic
-
-        if not scraped_data_list:
-            logger.info(f"No listings found for job_id: {job_id}")
-            crud.update_scrape_job_status(db, job_id, status="completed", results_count=0)
-            scrape_status.update({
-                "status": "completed",
-                "message": "Scraping completed. No new listings found or page was inaccessible.",
-                "results_count": 0
-            })
-            return
-
-        for item_data in scraped_data_list:
-            # Ensure all required fields for CarListingCreate are present
-            listing_create = schemas.CarListingCreate(
-                job_id=job_id,
-                platform=item_data.get("source_name", "autotrader"), # Get platform from scraper or default
-                url=item_data.get("listing_url"),
-                title=item_data.get("title"),
-                price=item_data.get("price"),
-                mileage=item_data.get("mileage"),
-                vin=item_data.get("vin"),
-                image_urls=item_data.get("image_urls", []),
-                raw_data=item_data.get("data_points", {})
-            )
-
-            existing_listing = crud.get_car_listing_by_url(db, str(listing_create.url))
-            if existing_listing:
-                # For now, we just count updates. Actual update logic could be added here.
-                # e.g., existing_listing.price = listing_create.price
-                # existing_listing.extracted_at = datetime.utcnow()
-                updated_count += 1
-            else:
-                crud.create_car_listing(db=db, listing=listing_create)
-                added_count += 1
-
-        crud.update_scrape_job_status(db, job_id, status="completed", results_count=added_count)
-        scrape_status.update({
-            "status": "completed",
-            "message": f"Scraping finished. Added: {added_count}, Updated: {updated_count} (placeholder).",
-            "results_count": added_count + updated_count # Or just added_count if updates aren't really changing data
-        })
-        logger.info(f"Background task for job_id: {job_id} completed. Added: {added_count}, Updated: {updated_count}")
-
-    except Exception as e:
-        logger.error(f"Error in background scraper task for job_id {job_id}: {e}", exc_info=True)
-        crud.update_scrape_job_status(db, job_id, status="failed", error_message=str(e))
-        scrape_status.update({
-            "status": "failed",
-            "message": f"Error during scraping: {str(e)}",
-            "error_message": str(e)
-        })
-    finally:
-        db.close()
-        logger.info(f"DB session closed for job_id: {job_id}")
-
-
-@app.post("/scrape/", response_model=schemas.ScrapeJob, status_code=202)
-async def trigger_scrape(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
-    """
-    Triggers a new scraping job for Autotrader.
-    """
-    global scrape_status
-    if scrape_status.get("status") == "running":
-        raise HTTPException(status_code=409, detail="A scraping job is already in progress.")
-
-    autotrader_url = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/all-cars/cars-under-10000") # Default to a common search if not set
-    headless_str = os.getenv("HEADLESS_BROWSER", "True")
-    headless = headless_str.lower() == "true"
-    scrape_timeout_str = os.getenv("SCRAPE_TIMEOUT", "120000")
-
-    try:
-        scrape_timeout = int(scrape_timeout_str)
-    except ValueError:
-        scrape_timeout = 120000 # Default timeout if parsing fails
-        logger.warning(f"Invalid SCRAPE_TIMEOUT value: {scrape_timeout_str}. Using default {scrape_timeout}ms.")
-
-    job = crud.create_scrape_job(db)
-    scrape_status.update({
-        "job_id": job.id,
-        "status": "pending",
-        "message": f"Scraping job {job.id} initiated for URL: {autotrader_url}",
-        "last_run_time": job.timestamp.isoformat(),
-        "duration_seconds": None,
-        "results_count": 0,
-        "error_message": None
-    })
-
-    # Pass job_id to the background task
-    background_tasks.add_task(run_scraping_task, job.id, autotrader_url, headless, scrape_timeout)
-
-    logger.info(f"Scraping job {job.id} queued for URL: {autotrader_url}")
-    return job
-
-@app.post("/api/v1/listings/ingest", response_model=schemas.CarListing, status_code=201)
-async def ingest_listing(payload: schemas.CarListingCreate, db: Session = Depends(get_db)):
-    """
-    Ingests a new car listing into the database.
-    This endpoint is useful for manually adding or testing data.
-    """
-    # Check if listing with this URL already exists to prevent duplicates,
-    # though the database constraint should also handle this.
-    db_listing = crud.get_car_listing_by_url(db, url=str(payload.url))
-    if db_listing:
-        raise HTTPException(status_code=400, detail="Listing with this URL already exists.")
-
-    # The job_id in CarListingCreate might be problematic if this is a direct ingest
-    # not tied to a specific scrape job. For now, we'll assume it's provided or
-    # we could adjust the schema/logic if direct ingestion shouldn't have a job_id.
-    # For testing, we might need to create a dummy job or adjust schema.
-    # Let's assume for now a valid job_id is provided or handle it if not.
-    if not payload.job_id:
-        # Create a dummy job or handle as per requirements for listings not tied to a job
-        # For simplicity, let's assume job_id is optional in the schema for this use case
-        # or a default/placeholder job_id is used.
-        # For this test, the payload includes job_id, so we'll proceed.
-        # If CarListingCreate schema requires job_id, this endpoint needs to handle it.
-        # For now, let's assume it's provided in the payload.
-        pass
-
-    try:
-        created_listing = crud.create_car_listing(db=db, listing=payload)
-        return created_listing
-    except Exception as e:
-        logger.error(f"Error ingesting listing: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
-
-
-@app.get("/scrape/status", response_model=schemas.ScrapeJob) # Using ScrapeJob schema for better structure
-async def get_current_scrape_status(db: Session = Depends(get_db)):
-    """
-    Returns the status of the current or last scraping job.
-    """
-    global scrape_status
-    if scrape_status.get("job_id"):
-        job = crud.get_scrape_job(db, scrape_status["job_id"])
-        if job:
-            # Update status from DB if available, otherwise use in-memory for simplicity
-            # A more robust system might always fetch from DB or use a proper job queue status
-            return job
-    return scrape_status # Fallback to in-memory status if job not found or not started
-
-@app.get("/scrape/jobs/", response_model=List[schemas.ScrapeJob])
-async def read_jobs(skip: int = 0, limit: int = 10, db: Session = Depends(get_db)):
-    """
-    Retrieve all scrape jobs.
-    """
-    jobs = crud.get_all_scrape_jobs(db, skip=skip, limit=limit)
-    return jobs
-
-@app.get("/scrape/jobs/{job_id}/results", response_model=List[schemas.CarListing])
-async def read_job_results(job_id: int, skip: int = 0, limit: int = 10, db: Session = Depends(get_db)):
-    """
-    Retrieve results for a specific scrape job.
-    """
-    job = crud.get_scrape_job(db, job_id=job_id)
-    if job is None:
-        raise HTTPException(status_code=404, detail="Job not found")
-    listings = crud.get_listings_for_job(db, job_id=job_id, skip=skip, limit=limit)
-    return listings
-
-@app.get("/")
-async def read_root():
-    return {"message": "AutoTrader Scraper API is running!"}
-
-# This is for local development if you run `python app/main.py`
-# Uvicorn will be started by Procfile in production environments like Heroku
-if __name__ == "__main__":
-    # Ensure tables are created before starting the app if they don't exist
-    # This is useful for local development but might be handled differently in production
-    from .database import create_tables
-    create_tables()
-
-    # Get port from environment variable or default to 8000
-    port = int(os.getenv("PORT", "8000"))
-    uvicorn.run(app, host="0.0.0.0", port=port)
-
-# Remove the old main.py content if it exists in the root directory
-# This is now handled by app/main.py
-# Ensure Procfile points to app.main:app or similar based on your directory structure
-# e.g., web: uvicorn app.main:app --host=0.0.0.0 --port=${PORT:-8000}
-# (Assuming app.py is moved to app/main.py)
-# If app.py remains in root, then Procfile is fine.
-
-# The `models.Base.metadata.create_all(bind=engine)` should ideally be called once,
-# perhaps in main.py or a startup script, not every time database.py is imported.
-# For simplicity in this single-file app structure, it's often put there.
-# If app.py is the main entry point for uvicorn, it's a good place.
-# For Render, buildCommand in render.yaml can also handle migrations/table creation.
-
-# Let's ensure the imports are correct considering the file structure
-# If main.py is in root and imports from app/, it should be `from app import crud, models, schemas, scraper`
-# If this file is app/main.py, then `from . import crud, models, schemas, scraper` is correct.
-# The prompt implies this file is app/main.py.
diff --git a/app/models.py b/app/models.py
deleted file mode 100644
index b0d4e5d..0000000
--- a/app/models.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
-from datetime import datetime
-
-Base = declarative_base()
-
-class ScrapeResult(Base):
-    __tablename__ = "scrape_results"
-
-    id = Column(Integer, primary_key=True, index=True)
-    url = Column(String, index=True)
-    title = Column(String)
-    price = Column(String, nullable=True) # Store as string to handle variations like 'Contact Seller'
-    mileage = Column(String, nullable=True) # Store as string to handle non-numeric values
-    vin = Column(String, nullable=True, unique=True)
-    images = Column(JSON, nullable=True) # Store list of image URLs
-    scraped_at = Column(DateTime)
-    details = Column(JSON, nullable=True) # Store other details as JSON
-
-class ScrapeJob(Base):
-    __tablename__ = "scrape_jobs"
-
-    id = Column(Integer, primary_key=True, index=True)
-    timestamp = Column(DateTime, default=datetime.utcnow)
-    status = Column(String, default="pending") # e.g., pending, running, completed, failed
-    results_count = Column(Integer, default=0)
-    error_message = Column(String, nullable=True)
-
-class ScrapedData(Base):
-    __tablename__ = "scraped_data"
-
-    id = Column(Integer, primary_key=True, index=True)
-    job_id = Column(Integer, ForeignKey("scrape_jobs.id"))
-    platform = Column(String) # e.g., 'autotrader', 'cars.com'
-    url = Column(String, unique=True, index=True)
-    title = Column(String, nullable=True)
-    price = Column(String, nullable=True)
-    mileage = Column(String, nullable=True)
-    vin = Column(String, nullable=True, index=True)
-    image_urls = Column(JSON, nullable=True) # List of image URLs
-    raw_data = Column(JSON, nullable=True) # Full raw data if needed
-    scraped_at = Column(DateTime, default=datetime.utcnow)
-
-    job = relationship("ScrapeJob")
diff --git a/app/schemas.py b/app/schemas.py
deleted file mode 100644
index 2ee0d8d..0000000
--- a/app/schemas.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from pydantic import BaseModel, HttpUrl
-from typing import List, Optional, Dict, Any
-from datetime import datetime
-
-class CarListingBase(BaseModel):
-    url: HttpUrl
-    title: Optional[str] = None
-    price: Optional[str] = None # Keep as string to handle variations
-    mileage: Optional[str] = None # Keep as string
-    vin: Optional[str] = None
-    image_urls: Optional[List[HttpUrl]] = []
-    raw_data: Optional[Dict[str, Any]] = {} # For any other unstructured data
-
-class CarListingCreate(CarListingBase):
-    platform: str
-    job_id: int
-
-class CarListing(CarListingBase):
-    id: int
-    platform: str
-    job_id: int
-    scraped_at: datetime
-
-    class Config:
-        orm_mode = True
-
-class ScrapeJobBase(BaseModel):
-    pass
-
-class ScrapeJobCreate(ScrapeJobBase):
-    pass
-
-class ScrapeJob(ScrapeJobBase):
-    id: int
-    timestamp: datetime
-    status: str
-    results_count: int = 0
-    error_message: Optional[str] = None
-
-    class Config:
-        orm_mode = True
diff --git a/app/scraper.py b/app/scraper.py
deleted file mode 100644
index b946ecb..0000000
--- a/app/scraper.py
+++ /dev/null
@@ -1,373 +0,0 @@
-import asyncio
-import logging
-# import os # No longer needed for getenv in main
-import datetime # Keep for now, might be used in data processing
-from playwright.async_api import async_playwright
-# Required for main test function
-from config import AUTOTRADER_URL, HEADLESS_BROWSER, SCRAPE_TIMEOUT
-# DATABASE_URL is used by database.py, SessionLocal will pick it up via config
-
-# Assuming database.py is in the same directory or accessible in PYTHONPATH
-from database import get_db, CarListing, SessionLocal # Added SessionLocal for main example
-from sqlalchemy.orm import Session
-from datetime import datetime # Ensure datetime is imported directly
-
-# Configure basic logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-from stealth_utils import apply_stealth_js # Import new stealth utility
-# from playwright_stealth import stealth_async # Commenting out old stealth
-
-class AutoTraderScraper:
-    """Scraper for AutoTrader private party listings using Playwright."""
-
-    def __init__(self, source_name: str = "autotrader"):
-        """
-        Initializes the AutoTraderScraper.
-        Args:
-            source_name (str): Name of the source platform.
-        """
-        self.source_name = source_name
-        # Potentially load other configs from a config file or env vars here
-        # For example: self.base_url = "https://www.autotrader.com/cars-for-sale/private-seller"
-
-    async def get_private_listings(self, autotrader_url: str, headless: bool, timeout: int = 120000) -> list[dict]:
-        """
-        Scrapes private party listings from AutoTrader using Playwright.
-
-        Args:
-            autotrader_url (str): The starting URL for scraping AutoTrader private listings.
-            headless (bool): Whether to run the browser in headless mode.
-            timeout (int): Maximum time in milliseconds for page operations.
-
-        Returns:
-            list[dict]: A list of dictionaries, where each dictionary represents a scraped vehicle listing.
-        """
-        listings_data = []
-        browser = None
-
-        launch_options = {
-            "headless": headless,
-            "args": [
-                '--no-sandbox',
-                '--disable-setuid-sandbox',
-                '--disable-infobars',
-                '--window-position=0,0',
-                '--ignore-certificate-errors',
-                '--ignore-certificate-errors-spki-list',
-                # '--user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"' # User agent is set in context
-                '--disable-gpu' # Already there but keep
-            ],
-            # "channel": "chrome" # This might require full Chrome install, trying without first to see if args help
-        }
-
-        # Try with 'msedge' or 'chrome' if default chromium fails and they are available
-        # For now, stick to chromium and args. If 'channel' is needed, it's a bigger setup change.
-
-        async with async_playwright() as p:
-            try:
-                # browser = await p.chromium.launch(**launch_options) # Default chromium
-                # Let's try specifying channel, assuming it might use a locally installed Chrome if available, or a Playwright-managed one.
-                # This is a common suggestion if the default Playwright Chromium build is too easily detected.
-                # If "chrome" channel is not found by Playwright, it will error.
-                try:
-                    browser = await p.chromium.launch(
-                        **launch_options,
-                        channel="chrome" # Attempt to use a branded Chrome build
-                    )
-                    logging.info("Attempting to launch with channel='chrome'")
-                except Exception as e_channel:
-                    logging.warning(f"Failed to launch with channel='chrome' ({e_channel}). Falling back to default Playwright Chromium.")
-                    # Remove channel from launch_options if it failed
-                    launch_options_no_channel = launch_options.copy()
-                    if "channel" in launch_options_no_channel: # Should not be needed based on above structure but good practice
-                        del launch_options_no_channel["channel"]
-                    browser = await p.chromium.launch(**launch_options_no_channel)
-                    logging.info("Launched with default Playwright Chromium.")
-
-
-                context = await browser.new_context(
-                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36', # A fairly common user agent
-                    java_script_enabled=True,
-                )
-                context.set_default_navigation_timeout(timeout)
-                context.set_default_timeout(timeout)
-
-                page = await context.new_page()
-                await page.set_viewport_size({"width": 1920, "height": 1080})
-
-                # Apply custom JS stealth
-                await apply_stealth_js(page)
-
-                logging.info(f"Navigating to {autotrader_url}")
-                await page.goto(autotrader_url, wait_until="domcontentloaded", timeout=timeout) # Reverted to domcontentloaded
-
-                title = await page.title()
-                logging.info(f"Page title: {title}")
-
-                if "unavailable" in title.lower() or "block" in title.lower() or "access denied" in title.lower():
-                    logging.critical(f"Failed to load AutoTrader listings page. Blocked by website. Title: {title}")
-                    await browser.close() # Ensure browser is closed before returning
-                    return []
-
-                # Using speculative selectors for AutoTrader
-                # Main container for listings: 'div[data-qaid="cntnr-lstng-main"]' (this might be too broad or incorrect)
-                # A more specific item selector might be needed, e.g., an article or a div with a specific class.
-                # For now, let's assume individual listing cards can be found with a selector like:
-                # "div.inventory-listing" or "div[data-cmp='inventoryListing']" - these are common patterns.
-                # The provided example 'div[data-qaid="cntnr-lstng-main"]' seems like it might be a single container FOR ALL listings.
-                # Let's try a more specific (but still guessed) selector for individual listing items.
-                # A common pattern is items within a list or grid. Let's try to find items:
-                # This selector is a **GUESS** based on common AutoTrader structures.
-                listing_item_selector = "div[data-cmp='inventoryListing']" # GUESS
-
-                # Fallback if the primary guess doesn't work, try another common pattern
-                # listing_item_selector_fallback = "div.inventory-listing.new-listing.stub" # Another GUESS
-
-                # await page.wait_for_selector(listing_item_selector, timeout=15000) # Wait for items to appear
-
-                listing_containers = await page.query_selector_all(listing_item_selector)
-
-                # if not listing_containers:
-                #     logging.info(f"No listings found with primary selector '{listing_item_selector}'. Trying fallback...")
-                #     listing_containers = await page.query_selector_all(listing_item_selector_fallback)
-
-                logging.info(f"Found {len(listing_containers)} potential listing containers using selector '{listing_item_selector}'.")
-
-                processed_count = 0
-                # first_container_processed_for_html_dump = False # REMOVE HTML DUMP FLAG
-                for i, container in enumerate(listing_containers):
-                    url_path = None
-                    title_text = "N/A" # Default to N/A
-                    price_text = "N/A" # Default to N/A
-                    mileage_text = "N/A" # Default to N/A (as it's not reliably on card)
-                    listing_url = None
-
-                    try:
-                        logging.debug(f"Processing container {i+1}/{len(listing_containers)}")
-
-                        # Attempt to get Title
-                        title_el = await container.query_selector("h2[data-cmp='subheading']") # Updated selector from HTML dump
-                        if title_el:
-                            raw_title_text = await title_el.inner_text()
-                            title_text = raw_title_text.strip() if raw_title_text else "N/A"
-
-                            # Attempt to get URL from parent <a> of title_el
-                            # Playwright's query_selector does not directly support xpath like "ancestor::a".
-                            # A common structure is <a><h3>...</h3></a> or <a><h2>...</h2></a>
-                            # We can try to find 'a' that contains this h2, or assume the 'a[data-cmp="link"]' is the one.
-
-                            # Let's use the a[data-cmp="link"] which was identified as containing the title h2
-                            parent_link_el = await container.query_selector("a[data-cmp='link']")
-                            if parent_link_el:
-                                url_path = await parent_link_el.get_attribute("href")
-                            else: # Fallback if the above structure isn't found
-                                logging.warning(f"Could not find parent a[data-cmp='link'] for title in listing {i+1}")
-                        else:
-                            logging.warning(f"Title not found with h2[data-cmp='subheading'] for listing {i+1}.")
-
-                        # Fallback or alternative for URL if not found via title's parent link
-                        if not url_path:
-                            url_el_alt = await container.query_selector("a[data-cmp='relLnk']") # Keep this fallback
-                            if url_el_alt:
-                                url_path = await url_el_alt.get_attribute("href")
-
-                        if not url_path: # Last resort for URL
-                            first_a = await container.query_selector("a[href]") # Broadest fallback
-                            if first_a:
-                                url_path = await first_a.get_attribute("href")
-
-                        if not url_path:
-                            logging.warning(f"Could not extract URL for listing {i+1} (Title: {title_text}). Skipping.")
-                            continue
-
-                        if not url_path.startswith(('http://', 'https://')):
-                            listing_url = f"https://www.autotrader.com{url_path}"
-                        else:
-                            listing_url = url_path
-
-                        # Attempt to get Price
-                        price_el = await container.query_selector("div[data-cmp='firstPrice']") # Updated selector
-                        if price_el:
-                            raw_price_text = await price_el.inner_text()
-                            price_text = raw_price_text.replace('$', '').replace(',', '').strip() if raw_price_text else "N/A"
-                        else:
-                            # Fallback for price (e.g. .first-price class directly)
-                            price_el_fallback = await container.query_selector(".first-price")
-                            if price_el_fallback:
-                                raw_price_text = await price_el_fallback.inner_text()
-                                price_text = raw_price_text.replace('$', '').replace(',', '').strip() if raw_price_text else "N/A"
-                            else:
-                                logging.warning(f"Price not found for listing {listing_url}")
-                                price_text = "N/A"
-
-
-                        # Mileage - Set to N/A as it's not reliably on the card from previous findings
-                        mileage_text = "N/A"
-                        # logging.info(f"Mileage not scraped from listing card for {listing_url} (by design for now).")
-
-                        vin_text = None
-
-                        listing_data = {
-                            "listing_url": listing_url,
-                            "title": title_text, # Already defaults to N/A or has value
-                            "price": price_text, # Already defaults to N/A or has value
-                            "mileage": mileage_text, # Is N/A
-                            "vin": vin_text,
-                            "source_name": self.source_name,
-                            "data_points": {
-                                "page_title_at_scrape": title # page's title, not listing's
-                            }
-                        }
-                        listings_data.append(listing_data)
-                        processed_count += 1
-                        logging.info(f"Successfully processed listing: {title_text[:50]}... URL: {listing_url}")
-
-                    except Exception as e:
-                        logging.error(f"Error processing listing container {i+1} for URL {listing_url if listing_url else 'Unknown'}: {e}", exc_info=True)
-                        continue
-
-                logging.info(f"Successfully processed {processed_count} out of {len(listing_containers)} listing containers.")
-
-            except Exception as e:
-                logging.error(f"An error occurred during Playwright scraping phase: {e}", exc_info=True)
-            finally:
-                if browser:
-                    logging.info("Closing browser.")
-                    await browser.close()
-
-        return listings_data
-
-
-async def scrape_autotrader_data(autotrader_url: str, headless: bool = True, timeout: int = 120000) -> list[dict]:
-    """
-    High-level function to scrape data from AutoTrader.
-    Initializes the scraper and calls its scraping method.
-
-    Args:
-        autotrader_url (str): The URL to scrape.
-        headless (bool): Whether to run the browser in headless mode.
-        timeout (int): Timeout for scraping operations in milliseconds.
-
-    Returns:
-        list[dict]: A list of scraped listing data.
-    """
-    scraper = AutoTraderScraper()
-    listings = await scraper.get_private_listings(autotrader_url=autotrader_url, headless=headless, timeout=timeout)
-    return listings
-
-
-async def scrape_autotrader_and_update_db(db: Session, autotrader_url: str, headless: bool, scrape_timeout: int):
-    """
-    Scrapes listings from AutoTrader and updates the database.
-
-    Args:
-        db (Session): The SQLAlchemy database session.
-        autotrader_url (str): The URL to scrape.
-        headless (bool): Whether to run the browser in headless mode.
-        scrape_timeout (int): Timeout for scraping operations in milliseconds.
-
-    Returns:
-        dict: A status dictionary with counts of added, updated, and scraped listings.
-    """
-    logging.info(f"Starting scrape and update for URL: {autotrader_url}")
-
-    try:
-        listings_data = await scrape_autotrader_data(
-            autotrader_url=autotrader_url,
-            headless=headless,
-            timeout=scrape_timeout
-        )
-    except Exception as e:
-        logging.error(f"Failed to scrape data from {autotrader_url}: {e}", exc_info=True)
-        return {"status": "error", "message": f"Scraping failed: {e}"}
-
-    added_count = 0
-    updated_count = 0
-    scraped_count = len(listings_data)
-
-    for listing_data in listings_data:
-        source_url = listing_data.get('listing_url') # Renamed from 'url' to 'listing_url' in dummy data
-        if not source_url:
-            logging.warning(f"Scraped item missing 'listing_url': {listing_data.get('title')}. Skipping.")
-            continue
-
-        try:
-            existing_listing = db.query(CarListing).filter(CarListing.source_url == source_url).first()
-
-            if existing_listing:
-                # Placeholder for update logic
-                # existing_listing.extracted_at = datetime.utcnow()
-                # existing_listing.data_points = {k: v for k, v in listing_data.items() if k != 'listing_url'}
-                # # Update other fields like price if necessary
-                # db.add(existing_listing) # Not strictly necessary if only mutable fields changed and session tracks
-                updated_count += 1
-                logging.info(f"Listing at {source_url} already exists. Marked for update (placeholder).")
-            else:
-                new_listing = CarListing(
-                    platform="autotrader",
-                    extracted_at=datetime.utcnow(),
-                    source_url=source_url,
-                    # Ensure data_points stores everything else from listing_data
-                    data_points={k: v for k, v in listing_data.items() if k != 'listing_url'}
-                )
-                db.add(new_listing)
-                added_count += 1
-                logging.info(f"New listing added from {source_url}")
-        except Exception as e:
-            logging.error(f"Error processing listing {source_url} for DB: {e}", exc_info=True)
-            # Decide if you want to rollback here or continue with other listings
-
-    try:
-        db.commit()
-        logging.info("Database changes committed.")
-    except Exception as e:
-        logging.error(f"Database commit failed: {e}", exc_info=True)
-        db.rollback()
-        return {"status": "error", "message": f"DB commit failed: {e}", "added": 0, "updated": 0, "scraped_count": scraped_count}
-
-    status_summary = {
-        "status": "success",
-        "added": added_count,
-        "updated": updated_count,
-        "scraped_count": scraped_count
-    }
-    logging.info(f"DB update summary: {status_summary}")
-    return status_summary
-
-async def main():
-    # Use settings from config.py
-    # url = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/private-seller")
-    # headless_str = os.getenv("HEADLESS_BROWSER", "True")
-    # headless = headless_str.lower() == "true"
-    # scrape_timeout_str = os.getenv("SCRAPE_TIMEOUT", "120000")
-    # try:
-    #     scrape_timeout = int(scrape_timeout_str)
-    # except ValueError:
-    #     logging.warning(f"Invalid SCRAPE_TIMEOUT value: {scrape_timeout_str}. Defaulting to 120000ms.")
-    #     scrape_timeout = 120000
-
-    # from database import SessionLocal # Already imported at the top
-    db: Session = SessionLocal() # SessionLocal now uses DATABASE_URL from config.py via database.py
-    try:
-        logging.info(f"Starting scraper and DB update for URL: {AUTOTRADER_URL}, Headless: {HEADLESS_BROWSER}, Timeout: {SCRAPE_TIMEOUT}ms")
-        stats = await scrape_autotrader_and_update_db(
-            db=db,
-            autotrader_url=AUTOTRADER_URL,
-            headless=HEADLESS_BROWSER,
-            scrape_timeout=SCRAPE_TIMEOUT
-        )
-        logging.info(f"Scraping and DB update completed: {stats}")
-    except Exception as e:
-        logging.error(f"Error during scraping and DB update in main: {e}", exc_info=True)
-    finally:
-        logging.info("Closing DB session in main.")
-        db.close()
-
-if __name__ == "__main__":
-    # To run this:
-    # 1. Ensure Playwright browsers are installed: `playwright install chromium`
-    # 2. Set environment variables if needed (AUTOTRADER_URL, HEADLESS_BROWSER, SCRAPE_TIMEOUT)
-    # 3. Uncomment the line below
-    asyncio.run(main())
-    # pass # Keep it passive for now, to be run manually when needed
diff --git a/config.py b/config.py
deleted file mode 100644
index 44148ca..0000000
--- a/config.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import os
-from dotenv import load_dotenv
-
-# Load environment variables from .env file if it exists
-# This is useful for local development.
-load_dotenv()
-
-# Database Configuration
-DATABASE_URL: str = os.getenv("DATABASE_URL", "sqlite:///./data/vehicle_tracker.db")
-
-# Scraper Configuration
-AUTOTRADER_URL: str = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/private-seller")
-SCRAPE_TIMEOUT: int = int(os.getenv("SCRAPE_TIMEOUT", "120000"))  # Milliseconds
-HEADLESS_BROWSER: bool = os.getenv("HEADLESS_BROWSER", "True").lower() == "true"
-
-# API Configuration (if any specific ones are needed later)
-# Example: API_HOST: str = os.getenv("API_HOST", "0.0.0.0")
-# Example: API_PORT: int = int(os.getenv("API_PORT", "8000"))
-
-# Logging Configuration (can also be added here if more complex)
-LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO").upper()
-
-# Ensure critical URLs have a scheme for robustness
-if not AUTOTRADER_URL.startswith(("http://", "https://")):
-    # This print statement is for immediate feedback during startup/import.
-    # In a pure library, side effects on import are sometimes discouraged,
-    # but for an application's main config, it's often acceptable.
-    print(f"Warning: AUTOTRADER_URL ('{AUTOTRADER_URL}') did not have a scheme, prepended https://.")
-    AUTOTRADER_URL = "https://" + AUTOTRADER_URL
-    print(f"Corrected AUTOTRADER_URL: {AUTOTRADER_URL}")
-
-
-# Example of how to handle SQLite connect_args based on config
-DB_CONNECT_ARGS: dict = {"check_same_thread": False} if DATABASE_URL.startswith("sqlite") else {}
diff --git a/database.py b/database.py
deleted file mode 100644
index 58a11dc..0000000
--- a/database.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from sqlalchemy import Column, Integer, String, DateTime, JSON, create_engine
-from sqlalchemy.orm import declarative_base, sessionmaker, Session
-from config import DATABASE_URL, DB_CONNECT_ARGS # Import from config
-
-# Use imported configuration
-engine = create_engine(DATABASE_URL, connect_args=DB_CONNECT_ARGS)
-
-SessionLocal = sessionmaker(bind=engine, autoflush=False)
-Base = declarative_base()
-
-class CarListing(Base):
-    __tablename__ = "listings"
-    id = Column(Integer, primary_key=True, index=True)
-    platform = Column(String)
-    extracted_at = Column(DateTime)
-    source_url = Column(String, unique=True)
-    data_points = Column(JSON)
-
-Base.metadata.create_all(bind=engine)
-
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
diff --git a/render.yaml b/render.yaml
index b3c7392..f755e35 100644
--- a/render.yaml
+++ b/render.yaml
@@ -11,9 +11,19 @@ services:
         value: 3.11
       - key: DATABASE_URL
         generateValue: true
-      - key: AUTOTRADER_URL
-        value: "https://www.autotrader.com/cars-for-sale/private-seller"
-      - key: SCRAPE_TIMEOUT
-        value: 120000
-      - key: HEADLESS_BROWSER
+      - key: HEADLESS
         value: "True"
+      - key: BROWSER_TIMEOUT
+        value: "60000"
+      - key: PAGE_DELAY
+        value: "5000"
+      - key: MIN_DELAY_BETWEEN_ACTIONS
+        value: "2.5"
+      - key: MAX_LISTINGS_PER_SESSION
+        value: "25"
+      - key: PROXY_SERVER
+        value: ""
+      - key: PROXY_USERNAME
+        value: ""
+      - key: PROXY_PASSWORD
+        value: ""
diff --git a/src/api/routes.py b/src/api/routes.py
index c1f5764..612fb3f 100644
--- a/src/api/routes.py
+++ b/src/api/routes.py
@@ -121,8 +121,10 @@ async def api_v1_root_info():
 
 @router.get("/vehicles/", response_model=List[VehicleListingResponse])
 async def get_all_vehicles(
+    skip: int = Query(0, ge=0),
+    limit: int = Query(settings.MAX_LISTINGS_PER_SESSION, ge=1, le=200),
     db: AsyncSession = Depends(get_db),
-    filters: SearchFilters = Depends()
+    filters: SearchFilters = Depends(),
 ):
     query = select(VehicleListing)
     conditions = []
@@ -151,7 +153,7 @@ async def get_all_vehicles(
     if conditions:
         query = query.where(and_(*conditions))
     query = query.order_by(VehicleListing.last_scraped_at.desc(), VehicleListing.created_at.desc())
-    result = await db.execute(query)
+    result = await db.execute(query.offset(skip).limit(limit))
     vehicles = result.scalars().all()
     response_vehicles = []
     for vehicle_db_item in vehicles:
diff --git a/src/automation/browser_sim.py b/src/automation/browser_sim.py
index 96ed36f..c93f204 100644
--- a/src/automation/browser_sim.py
+++ b/src/automation/browser_sim.py
@@ -25,8 +25,16 @@ async def __aenter__(self):
         logger.info("Initializing AutoTrader Scraper...")
         self.playwright_instance = await async_playwright().start()
         try:
+            proxy_cfg = None
+            if settings.PROXY_SERVER:
+                proxy_cfg = {"server": settings.PROXY_SERVER}
+                if settings.PROXY_USERNAME and settings.PROXY_PASSWORD:
+                    proxy_cfg["username"] = settings.PROXY_USERNAME
+                    proxy_cfg["password"] = settings.PROXY_PASSWORD
+
             self.browser = await self.playwright_instance.chromium.launch(
                 headless=settings.HEADLESS,
+                proxy=proxy_cfg,
                 args=[
                     '--no-sandbox',
                     '--disable-setuid-sandbox',
diff --git a/src/config.py b/src/config.py
index 598bc2e..94a9b77 100644
--- a/src/config.py
+++ b/src/config.py
@@ -13,4 +13,9 @@ class Settings:
     API_PORT: int = int(os.getenv("API_PORT", "8000"))
     MAX_LISTINGS_PER_SESSION: int = int(os.getenv("MAX_LISTINGS_PER_SESSION", "25"))
 
+    # Proxy configuration
+    PROXY_SERVER: str | None = os.getenv("PROXY_SERVER")
+    PROXY_USERNAME: str | None = os.getenv("PROXY_USERNAME")
+    PROXY_PASSWORD: str | None = os.getenv("PROXY_PASSWORD")
+
 settings = Settings()

From bdd4820e71f0effb98c57a75fd3cef963aac810e Mon Sep 17 00:00:00 2001
From: hellothere012 <abbaaliharris@gmail.com>
Date: Wed, 4 Jun 2025 08:04:42 -0700
Subject: [PATCH 2/4] feat: add proxy support and cleanup

---
 .env.example                  |  37 +--
 Procfile                      |   2 +-
 README.md                     |  48 +++-
 app.py                        | 123 ----------
 app/crud.py                   |  50 ----
 app/database.py               |  25 --
 app/main.py                   | 265 ---------------------
 app/models.py                 |  45 ----
 app/schemas.py                |  41 ----
 app/scraper.py                | 373 ------------------------------
 config.py                     |  34 ---
 database.py                   |  26 ---
 main.py                       |  93 ++++++++
 render.yaml                   |  26 ++-
 requirements.txt              |  15 +-
 {app => src}/__init__.py      |   0
 src/api/__init__.py           |   0
 src/api/routes.py             | 237 +++++++++++++++++++
 src/automation/__init__.py    |   0
 src/automation/browser_sim.py | 420 ++++++++++++++++++++++++++++++++++
 src/config.py                 |  21 ++
 src/database.py               |  28 +++
 src/models/__init__.py        |   0
 src/models/vehicle.py         |  84 +++++++
 24 files changed, 975 insertions(+), 1018 deletions(-)
 delete mode 100644 app.py
 delete mode 100644 app/crud.py
 delete mode 100644 app/database.py
 delete mode 100644 app/main.py
 delete mode 100644 app/models.py
 delete mode 100644 app/schemas.py
 delete mode 100644 app/scraper.py
 delete mode 100644 config.py
 delete mode 100644 database.py
 create mode 100644 main.py
 rename {app => src}/__init__.py (100%)
 create mode 100644 src/api/__init__.py
 create mode 100644 src/api/routes.py
 create mode 100644 src/automation/__init__.py
 create mode 100644 src/automation/browser_sim.py
 create mode 100644 src/config.py
 create mode 100644 src/database.py
 create mode 100644 src/models/__init__.py
 create mode 100644 src/models/vehicle.py

diff --git a/.env.example b/.env.example
index f7b9b12..4744016 100644
--- a/.env.example
+++ b/.env.example
@@ -1,21 +1,22 @@
-# Autotrader Configuration
-AUTOTRADER_URL="https://www.autotrader.com/cars-for-sale/by-owner/fullerton-ca?zip=92833&searchRadius=50&numRecords=100&sortBy=priceDESC"
+# Database Configuration
+DATABASE_URL=sqlite+aiosqlite:///./vehicle_data.db
 
-# Webshare Proxy Configuration
-PROXY_HOST="your_webshare_proxy_host"
-PROXY_PORT="your_webshare_proxy_port"
-WEBSHARE_USERNAME="your_webshare_username"
-WEBSHARE_PASSWORD="your_webshare_password"
+# Browser Configuration
+HEADLESS=true
+BROWSER_TIMEOUT=60000
+PAGE_DELAY=5000
+MIN_DELAY_BETWEEN_ACTIONS=2.5
 
-# Database Configuration
-# For local SQLite (default):
-DATABASE_URL="sqlite+aiosqlite:///./data/vehicle_tracker.db"
-DATABASE_TYPE="sqlite"
-# Example for PostgreSQL:
-# DATABASE_URL="postgresql+asyncpg://user:password@host:port/dbname"
-# DATABASE_TYPE="postgresql"
+# API Configuration
+API_HOST=127.0.0.1
+API_PORT=8000
+
+# Scraping Limits
+MAX_LISTINGS_PER_SESSION=25
 
-# Application Configuration
-LOG_LEVEL="INFO"
-HEADLESS_BROWSER="True" # For Playwright
-SCRAPE_TIMEOUT="120000" # For Playwright page/navigation timeout (milliseconds)
+# Optional Proxy Configuration
+# If using rotating proxies (e.g., Webshare), uncomment and provide the proxy URL.
+# Example: http://username:password@proxyhost:port
+# PROXY_SERVER=
+# PROXY_USERNAME=
+# PROXY_PASSWORD=
diff --git a/Procfile b/Procfile
index 3972b54..84b6dde 100644
--- a/Procfile
+++ b/Procfile
@@ -1 +1 @@
-web: uvicorn app.main:app --host=0.0.0.0 --port=${PORT:-8000}
+web: uvicorn main:app --host=0.0.0.0 --port=${PORT:-8000}
diff --git a/README.md b/README.md
index 894b33c..7525e16 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,47 @@
-# vehicle-tracker
\ No newline at end of file
+# Vehicle Tracker
+
+This project provides a FastAPI-based API and web scraper for collecting and storing vehicle listings from sites like AutoTrader. It uses Playwright for scraping and SQLAlchemy with SQLite for storage.
+
+## Usage
+
+1. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   playwright install chromium
+   ```
+2. Copy `.env.example` to `.env` and adjust settings as needed.
+3. Run the API:
+   ```bash
+   uvicorn main:app --reload
+   ```
+4. Trigger scraping via the `/api/v1/vehicles/scrape` endpoint.
+
+The scraper can also be run standalone:
+```bash
+python main.py scrape_test
+```
+
+## Environment Variables
+
+Set the following variables in a `.env` file or your deployment environment:
+
+| Variable | Description | Default |
+| --- | --- | --- |
+| `DATABASE_URL` | Database connection URL | `sqlite+aiosqlite:///./vehicle_data.db` |
+| `HEADLESS` | Run the browser in headless mode | `true` |
+| `BROWSER_TIMEOUT` | Playwright launch timeout (ms) | `60000` |
+| `PAGE_DELAY` | Base delay after page loads (ms) | `5000` |
+| `MIN_DELAY_BETWEEN_ACTIONS` | Delay between scraping actions (s) | `2.5` |
+| `API_HOST` | Host for the FastAPI server | `127.0.0.1` |
+| `API_PORT` | Port for the FastAPI server | `8000` |
+| `MAX_LISTINGS_PER_SESSION` | Maximum listings fetched per scrape | `25` |
+| `PROXY_SERVER` | *(Optional)* Proxy URL for Playwright | - |
+| `PROXY_USERNAME` | *(Optional)* Proxy username | - |
+| `PROXY_PASSWORD` | *(Optional)* Proxy password | - |
+
+### Pagination
+
+The `/api/v1/vehicles/` endpoint accepts `skip` and `limit` query parameters to paginate results.
+Example: `/api/v1/vehicles/?skip=25&limit=25`.
+
+
diff --git a/app.py b/app.py
deleted file mode 100644
index 8fa8500..0000000
--- a/app.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import logging
-# import os # No longer needed for getenv in background task
-import asyncio
-from fastapi import FastAPI, Depends, BackgroundTasks
-from pydantic import BaseModel
-from typing import Dict
-from datetime import datetime
-from database import CarListing, get_db, Session, SessionLocal
-from scraper import scrape_autotrader_and_update_db
-from fastapi.middleware.cors import CORSMiddleware
-from config import AUTOTRADER_URL, HEADLESS_BROWSER, SCRAPE_TIMEOUT, LOG_LEVEL # Import from config
-
-# Configure basic logging using LOG_LEVEL from config
-# Ensure this is called only once. If FastAPI/Uvicorn also configures logging,
-# this might need adjustment or to be handled by the logger instance directly.
-# For now, assume this is the primary logging config.
-logging.basicConfig(level=LOG_LEVEL, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
-# Added force=True to ensure this config takes precedence if uvicorn also tries to set basicConfig.
-
-app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-class CarListingRaw(BaseModel):
-    platform: str
-    extracted_at: datetime
-    source_url: str
-    data_points: Dict
-
-@app.post("/api/v1/listings/ingest")
-async def ingest_listing(payload: CarListingRaw, db: Session = Depends(get_db)):
-    listing = CarListing(
-        platform=payload.platform,
-        extracted_at=payload.extracted_at,
-        source_url=payload.source_url,
-        data_points=payload.data_points
-    )
-    db.add(listing)
-    db.commit()
-    db.refresh(listing)
-    return {"status": "saved", "listing_id": listing.id}
-
-@app.get("/")
-def read_root():
-    return {"message": "🚗 Car Tracker API is running!"}
-
-# Global variable to store scraping status
-scrape_status = {
-    "last_run_time": None,
-    "status": "idle", # States: idle, running, success, error
-    "message": "",
-    "added": 0,
-    "updated": 0,
-    "scraped_count": 0
-}
-
-# Background task wrapper
-async def _background_scraper_task_wrapper():
-    global scrape_status
-    db_task_session: Session = SessionLocal()
-    logging.info("Background scraper task started.")
-    scrape_status["status"] = "running"
-    scrape_status["message"] = "Scraping in progress..."
-    scrape_status["last_run_time"] = datetime.utcnow().isoformat()
-    scrape_status["added"] = 0 # Reset counts for current run
-    scrape_status["updated"] = 0
-    scrape_status["scraped_count"] = 0
-
-    try:
-        # Use imported config values
-        # autotrader_url = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/private-seller")
-        # headless_str = os.getenv("HEADLESS_BROWSER", "True")
-        # headless = headless_str.lower() == "true"
-        # scrape_timeout_str = os.getenv("SCRAPE_TIMEOUT", "120000")
-        # try:
-        #     scrape_timeout = int(scrape_timeout_str)
-        # except ValueError:
-        #     logging.warning(f"Invalid SCRAPE_TIMEOUT value: {scrape_timeout_str}. Defaulting to 120000ms.")
-        #     scrape_timeout = 120000
-
-        logging.info(f"Background task using URL: {AUTOTRADER_URL}, Headless: {HEADLESS_BROWSER}, Timeout: {SCRAPE_TIMEOUT}ms")
-
-        result = await scrape_autotrader_and_update_db(
-            db=db_task_session,
-            autotrader_url=AUTOTRADER_URL,
-            headless=HEADLESS_BROWSER,
-            scrape_timeout=SCRAPE_TIMEOUT
-        )
-
-        if result.get("status") == "success":
-            scrape_status["status"] = "success"
-            scrape_status["message"] = "Scraping completed successfully."
-            scrape_status["added"] = result.get("added", 0)
-            scrape_status["updated"] = result.get("updated", 0)
-            scrape_status["scraped_count"] = result.get("scraped_count", 0)
-        else:
-            scrape_status["status"] = "error"
-            scrape_status["message"] = result.get("message", "Scraping failed with an unknown error.")
-
-        logging.info(f"Background scraper task completed: {result}")
-
-    except Exception as e:
-        logging.error(f"Error in background scraper task: {e}", exc_info=True)
-        scrape_status["status"] = "error"
-        scrape_status["message"] = str(e)
-    finally:
-        db_task_session.close()
-        logging.info("Background scraper DB session closed.")
-
-@app.post("/api/v1/scrape/autotrader")
-async def trigger_autotrader_scrape(background_tasks: BackgroundTasks):
-    if scrape_status["status"] == "running":
-        return {"message": "AutoTrader scraping job is already running."}
-    background_tasks.add_task(_background_scraper_task_wrapper)
-    return {"message": "AutoTrader scraping job started in the background."}
-
-@app.get("/api/v1/scrape/status")
-async def get_scrape_status():
-    return scrape_status
diff --git a/app/crud.py b/app/crud.py
deleted file mode 100644
index fb8708b..0000000
--- a/app/crud.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from sqlalchemy.orm import Session
-from . import models, schemas
-from datetime import datetime
-
-def get_car_listing_by_url(db: Session, url: str):
-    return db.query(models.ScrapedData).filter(models.ScrapedData.url == url).first()
-
-def create_car_listing(db: Session, listing: schemas.CarListingCreate):
-    db_listing = models.ScrapedData(
-        job_id=listing.job_id,
-        platform=listing.platform,
-        url=str(listing.url), # Ensure HttpUrl is converted to string
-        title=listing.title,
-        price=listing.price,
-        mileage=listing.mileage,
-        vin=listing.vin,
-        image_urls=listing.image_urls, # Assuming image_urls is already a list of strings or compatible JSON
-        raw_data=listing.raw_data,
-        scraped_at=datetime.utcnow()
-    )
-    db.add(db_listing)
-    db.commit()
-    db.refresh(db_listing)
-    return db_listing
-
-def create_scrape_job(db: Session) -> models.ScrapeJob:
-    db_job = models.ScrapeJob(timestamp=datetime.utcnow(), status="pending")
-    db.add(db_job)
-    db.commit()
-    db.refresh(db_job)
-    return db_job
-
-def update_scrape_job_status(db: Session, job_id: int, status: str, results_count: int = 0, error_message: str = None):
-    db_job = db.query(models.ScrapeJob).filter(models.ScrapeJob.id == job_id).first()
-    if db_job:
-        db_job.status = status
-        db_job.results_count = results_count
-        db_job.error_message = error_message
-        db.commit()
-        db.refresh(db_job)
-    return db_job
-
-def get_scrape_job(db: Session, job_id: int):
-    return db.query(models.ScrapeJob).filter(models.ScrapeJob.id == job_id).first()
-
-def get_all_scrape_jobs(db: Session, skip: int = 0, limit: int = 100):
-    return db.query(models.ScrapeJob).order_by(models.ScrapeJob.timestamp.desc()).offset(skip).limit(limit).all()
-
-def get_listings_for_job(db: Session, job_id: int, skip: int = 0, limit: int = 100):
-    return db.query(models.ScrapedData).filter(models.ScrapedData.job_id == job_id).offset(skip).limit(limit).all()
diff --git a/app/database.py b/app/database.py
deleted file mode 100644
index bf32154..0000000
--- a/app/database.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from sqlalchemy import create_engine
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import sessionmaker
-import os
-
-DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./data/vehicle_tracker.db")
-
-engine_args = {}
-if DATABASE_URL.startswith("sqlite"):
-    engine_args["connect_args"] = {"check_same_thread": False}
-
-engine = create_engine(DATABASE_URL, **engine_args)
-SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-
-Base = declarative_base()
-
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-
-def create_tables():
-    Base.metadata.create_all(bind=engine)
diff --git a/app/main.py b/app/main.py
deleted file mode 100644
index 4817f15..0000000
--- a/app/main.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import logging
-import os
-from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
-from sqlalchemy.orm import Session
-from typing import List
-
-from . import crud, models, schemas, scraper
-from .database import SessionLocal, engine
-
-# Create database tables if they don't exist
-models.Base.metadata.create_all(bind=engine)
-
-# Configure logging
-LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
-logging.basicConfig(level=LOG_LEVEL, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-
-app = FastAPI(title="AutoTrader Scraper API", version="1.0.0")
-
-# Dependency to get DB session
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-
-# Global variable to store scraping status (simple approach)
-scrape_status = {
-    "job_id": None,
-    "status": "idle", # States: idle, pending, running, completed, failed
-    "message": "No scraping job initiated yet.",
-    "last_run_time": None,
-    "duration_seconds": None,
-    "results_count": 0,
-    "error_message": None
-}
-
-async def run_scraping_task(job_id: int, autotrader_url: str, headless: bool, scrape_timeout: int):
-    """
-    The actual scraping task that runs in the background.
-    It creates its own database session.
-    """
-    global scrape_status
-    db: Session = SessionLocal()
-    try:
-        logger.info(f"Background task started for job_id: {job_id}")
-        crud.update_scrape_job_status(db, job_id, status="running")
-        scrape_status.update({
-            "job_id": job_id,
-            "status": "running",
-            "message": f"Scraping from {autotrader_url}...",
-            "last_run_time": datetime.utcnow().isoformat(),
-            "duration_seconds": None,
-            "results_count": 0,
-            "error_message": None
-        })
-
-        start_time = datetime.utcnow()
-
-        scraped_data_list = await scraper.scrape_autotrader_data(
-            autotrader_url=autotrader_url,
-            headless=headless,
-            timeout=scrape_timeout
-        )
-
-        end_time = datetime.utcnow()
-        duration = (end_time - start_time).total_seconds()
-        scrape_status["duration_seconds"] = round(duration, 2)
-
-        added_count = 0
-        updated_count = 0 # Placeholder for future update logic
-
-        if not scraped_data_list:
-            logger.info(f"No listings found for job_id: {job_id}")
-            crud.update_scrape_job_status(db, job_id, status="completed", results_count=0)
-            scrape_status.update({
-                "status": "completed",
-                "message": "Scraping completed. No new listings found or page was inaccessible.",
-                "results_count": 0
-            })
-            return
-
-        for item_data in scraped_data_list:
-            # Ensure all required fields for CarListingCreate are present
-            listing_create = schemas.CarListingCreate(
-                job_id=job_id,
-                platform=item_data.get("source_name", "autotrader"), # Get platform from scraper or default
-                url=item_data.get("listing_url"),
-                title=item_data.get("title"),
-                price=item_data.get("price"),
-                mileage=item_data.get("mileage"),
-                vin=item_data.get("vin"),
-                image_urls=item_data.get("image_urls", []),
-                raw_data=item_data.get("data_points", {})
-            )
-
-            existing_listing = crud.get_car_listing_by_url(db, str(listing_create.url))
-            if existing_listing:
-                # For now, we just count updates. Actual update logic could be added here.
-                # e.g., existing_listing.price = listing_create.price
-                # existing_listing.extracted_at = datetime.utcnow()
-                updated_count += 1
-            else:
-                crud.create_car_listing(db=db, listing=listing_create)
-                added_count += 1
-
-        crud.update_scrape_job_status(db, job_id, status="completed", results_count=added_count)
-        scrape_status.update({
-            "status": "completed",
-            "message": f"Scraping finished. Added: {added_count}, Updated: {updated_count} (placeholder).",
-            "results_count": added_count + updated_count # Or just added_count if updates aren't really changing data
-        })
-        logger.info(f"Background task for job_id: {job_id} completed. Added: {added_count}, Updated: {updated_count}")
-
-    except Exception as e:
-        logger.error(f"Error in background scraper task for job_id {job_id}: {e}", exc_info=True)
-        crud.update_scrape_job_status(db, job_id, status="failed", error_message=str(e))
-        scrape_status.update({
-            "status": "failed",
-            "message": f"Error during scraping: {str(e)}",
-            "error_message": str(e)
-        })
-    finally:
-        db.close()
-        logger.info(f"DB session closed for job_id: {job_id}")
-
-
-@app.post("/scrape/", response_model=schemas.ScrapeJob, status_code=202)
-async def trigger_scrape(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
-    """
-    Triggers a new scraping job for Autotrader.
-    """
-    global scrape_status
-    if scrape_status.get("status") == "running":
-        raise HTTPException(status_code=409, detail="A scraping job is already in progress.")
-
-    autotrader_url = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/all-cars/cars-under-10000") # Default to a common search if not set
-    headless_str = os.getenv("HEADLESS_BROWSER", "True")
-    headless = headless_str.lower() == "true"
-    scrape_timeout_str = os.getenv("SCRAPE_TIMEOUT", "120000")
-
-    try:
-        scrape_timeout = int(scrape_timeout_str)
-    except ValueError:
-        scrape_timeout = 120000 # Default timeout if parsing fails
-        logger.warning(f"Invalid SCRAPE_TIMEOUT value: {scrape_timeout_str}. Using default {scrape_timeout}ms.")
-
-    job = crud.create_scrape_job(db)
-    scrape_status.update({
-        "job_id": job.id,
-        "status": "pending",
-        "message": f"Scraping job {job.id} initiated for URL: {autotrader_url}",
-        "last_run_time": job.timestamp.isoformat(),
-        "duration_seconds": None,
-        "results_count": 0,
-        "error_message": None
-    })
-
-    # Pass job_id to the background task
-    background_tasks.add_task(run_scraping_task, job.id, autotrader_url, headless, scrape_timeout)
-
-    logger.info(f"Scraping job {job.id} queued for URL: {autotrader_url}")
-    return job
-
-@app.post("/api/v1/listings/ingest", response_model=schemas.CarListing, status_code=201)
-async def ingest_listing(payload: schemas.CarListingCreate, db: Session = Depends(get_db)):
-    """
-    Ingests a new car listing into the database.
-    This endpoint is useful for manually adding or testing data.
-    """
-    # Check if listing with this URL already exists to prevent duplicates,
-    # though the database constraint should also handle this.
-    db_listing = crud.get_car_listing_by_url(db, url=str(payload.url))
-    if db_listing:
-        raise HTTPException(status_code=400, detail="Listing with this URL already exists.")
-
-    # The job_id in CarListingCreate might be problematic if this is a direct ingest
-    # not tied to a specific scrape job. For now, we'll assume it's provided or
-    # we could adjust the schema/logic if direct ingestion shouldn't have a job_id.
-    # For testing, we might need to create a dummy job or adjust schema.
-    # Let's assume for now a valid job_id is provided or handle it if not.
-    if not payload.job_id:
-        # Create a dummy job or handle as per requirements for listings not tied to a job
-        # For simplicity, let's assume job_id is optional in the schema for this use case
-        # or a default/placeholder job_id is used.
-        # For this test, the payload includes job_id, so we'll proceed.
-        # If CarListingCreate schema requires job_id, this endpoint needs to handle it.
-        # For now, let's assume it's provided in the payload.
-        pass
-
-    try:
-        created_listing = crud.create_car_listing(db=db, listing=payload)
-        return created_listing
-    except Exception as e:
-        logger.error(f"Error ingesting listing: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
-
-
-@app.get("/scrape/status", response_model=schemas.ScrapeJob) # Using ScrapeJob schema for better structure
-async def get_current_scrape_status(db: Session = Depends(get_db)):
-    """
-    Returns the status of the current or last scraping job.
-    """
-    global scrape_status
-    if scrape_status.get("job_id"):
-        job = crud.get_scrape_job(db, scrape_status["job_id"])
-        if job:
-            # Update status from DB if available, otherwise use in-memory for simplicity
-            # A more robust system might always fetch from DB or use a proper job queue status
-            return job
-    return scrape_status # Fallback to in-memory status if job not found or not started
-
-@app.get("/scrape/jobs/", response_model=List[schemas.ScrapeJob])
-async def read_jobs(skip: int = 0, limit: int = 10, db: Session = Depends(get_db)):
-    """
-    Retrieve all scrape jobs.
-    """
-    jobs = crud.get_all_scrape_jobs(db, skip=skip, limit=limit)
-    return jobs
-
-@app.get("/scrape/jobs/{job_id}/results", response_model=List[schemas.CarListing])
-async def read_job_results(job_id: int, skip: int = 0, limit: int = 10, db: Session = Depends(get_db)):
-    """
-    Retrieve results for a specific scrape job.
-    """
-    job = crud.get_scrape_job(db, job_id=job_id)
-    if job is None:
-        raise HTTPException(status_code=404, detail="Job not found")
-    listings = crud.get_listings_for_job(db, job_id=job_id, skip=skip, limit=limit)
-    return listings
-
-@app.get("/")
-async def read_root():
-    return {"message": "AutoTrader Scraper API is running!"}
-
-# This is for local development if you run `python app/main.py`
-# Uvicorn will be started by Procfile in production environments like Heroku
-if __name__ == "__main__":
-    # Ensure tables are created before starting the app if they don't exist
-    # This is useful for local development but might be handled differently in production
-    from .database import create_tables
-    create_tables()
-
-    # Get port from environment variable or default to 8000
-    port = int(os.getenv("PORT", "8000"))
-    uvicorn.run(app, host="0.0.0.0", port=port)
-
-# Remove the old main.py content if it exists in the root directory
-# This is now handled by app/main.py
-# Ensure Procfile points to app.main:app or similar based on your directory structure
-# e.g., web: uvicorn app.main:app --host=0.0.0.0 --port=${PORT:-8000}
-# (Assuming app.py is moved to app/main.py)
-# If app.py remains in root, then Procfile is fine.
-
-# The `models.Base.metadata.create_all(bind=engine)` should ideally be called once,
-# perhaps in main.py or a startup script, not every time database.py is imported.
-# For simplicity in this single-file app structure, it's often put there.
-# If app.py is the main entry point for uvicorn, it's a good place.
-# For Render, buildCommand in render.yaml can also handle migrations/table creation.
-
-# Let's ensure the imports are correct considering the file structure
-# If main.py is in root and imports from app/, it should be `from app import crud, models, schemas, scraper`
-# If this file is app/main.py, then `from . import crud, models, schemas, scraper` is correct.
-# The prompt implies this file is app/main.py.
diff --git a/app/models.py b/app/models.py
deleted file mode 100644
index b0d4e5d..0000000
--- a/app/models.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
-from datetime import datetime
-
-Base = declarative_base()
-
-class ScrapeResult(Base):
-    __tablename__ = "scrape_results"
-
-    id = Column(Integer, primary_key=True, index=True)
-    url = Column(String, index=True)
-    title = Column(String)
-    price = Column(String, nullable=True) # Store as string to handle variations like 'Contact Seller'
-    mileage = Column(String, nullable=True) # Store as string to handle non-numeric values
-    vin = Column(String, nullable=True, unique=True)
-    images = Column(JSON, nullable=True) # Store list of image URLs
-    scraped_at = Column(DateTime)
-    details = Column(JSON, nullable=True) # Store other details as JSON
-
-class ScrapeJob(Base):
-    __tablename__ = "scrape_jobs"
-
-    id = Column(Integer, primary_key=True, index=True)
-    timestamp = Column(DateTime, default=datetime.utcnow)
-    status = Column(String, default="pending") # e.g., pending, running, completed, failed
-    results_count = Column(Integer, default=0)
-    error_message = Column(String, nullable=True)
-
-class ScrapedData(Base):
-    __tablename__ = "scraped_data"
-
-    id = Column(Integer, primary_key=True, index=True)
-    job_id = Column(Integer, ForeignKey("scrape_jobs.id"))
-    platform = Column(String) # e.g., 'autotrader', 'cars.com'
-    url = Column(String, unique=True, index=True)
-    title = Column(String, nullable=True)
-    price = Column(String, nullable=True)
-    mileage = Column(String, nullable=True)
-    vin = Column(String, nullable=True, index=True)
-    image_urls = Column(JSON, nullable=True) # List of image URLs
-    raw_data = Column(JSON, nullable=True) # Full raw data if needed
-    scraped_at = Column(DateTime, default=datetime.utcnow)
-
-    job = relationship("ScrapeJob")
diff --git a/app/schemas.py b/app/schemas.py
deleted file mode 100644
index 2ee0d8d..0000000
--- a/app/schemas.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from pydantic import BaseModel, HttpUrl
-from typing import List, Optional, Dict, Any
-from datetime import datetime
-
-class CarListingBase(BaseModel):
-    url: HttpUrl
-    title: Optional[str] = None
-    price: Optional[str] = None # Keep as string to handle variations
-    mileage: Optional[str] = None # Keep as string
-    vin: Optional[str] = None
-    image_urls: Optional[List[HttpUrl]] = []
-    raw_data: Optional[Dict[str, Any]] = {} # For any other unstructured data
-
-class CarListingCreate(CarListingBase):
-    platform: str
-    job_id: int
-
-class CarListing(CarListingBase):
-    id: int
-    platform: str
-    job_id: int
-    scraped_at: datetime
-
-    class Config:
-        orm_mode = True
-
-class ScrapeJobBase(BaseModel):
-    pass
-
-class ScrapeJobCreate(ScrapeJobBase):
-    pass
-
-class ScrapeJob(ScrapeJobBase):
-    id: int
-    timestamp: datetime
-    status: str
-    results_count: int = 0
-    error_message: Optional[str] = None
-
-    class Config:
-        orm_mode = True
diff --git a/app/scraper.py b/app/scraper.py
deleted file mode 100644
index b946ecb..0000000
--- a/app/scraper.py
+++ /dev/null
@@ -1,373 +0,0 @@
-import asyncio
-import logging
-# import os # No longer needed for getenv in main
-import datetime # Keep for now, might be used in data processing
-from playwright.async_api import async_playwright
-# Required for main test function
-from config import AUTOTRADER_URL, HEADLESS_BROWSER, SCRAPE_TIMEOUT
-# DATABASE_URL is used by database.py, SessionLocal will pick it up via config
-
-# Assuming database.py is in the same directory or accessible in PYTHONPATH
-from database import get_db, CarListing, SessionLocal # Added SessionLocal for main example
-from sqlalchemy.orm import Session
-from datetime import datetime # Ensure datetime is imported directly
-
-# Configure basic logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-from stealth_utils import apply_stealth_js # Import new stealth utility
-# from playwright_stealth import stealth_async # Commenting out old stealth
-
-class AutoTraderScraper:
-    """Scraper for AutoTrader private party listings using Playwright."""
-
-    def __init__(self, source_name: str = "autotrader"):
-        """
-        Initializes the AutoTraderScraper.
-        Args:
-            source_name (str): Name of the source platform.
-        """
-        self.source_name = source_name
-        # Potentially load other configs from a config file or env vars here
-        # For example: self.base_url = "https://www.autotrader.com/cars-for-sale/private-seller"
-
-    async def get_private_listings(self, autotrader_url: str, headless: bool, timeout: int = 120000) -> list[dict]:
-        """
-        Scrapes private party listings from AutoTrader using Playwright.
-
-        Args:
-            autotrader_url (str): The starting URL for scraping AutoTrader private listings.
-            headless (bool): Whether to run the browser in headless mode.
-            timeout (int): Maximum time in milliseconds for page operations.
-
-        Returns:
-            list[dict]: A list of dictionaries, where each dictionary represents a scraped vehicle listing.
-        """
-        listings_data = []
-        browser = None
-
-        launch_options = {
-            "headless": headless,
-            "args": [
-                '--no-sandbox',
-                '--disable-setuid-sandbox',
-                '--disable-infobars',
-                '--window-position=0,0',
-                '--ignore-certificate-errors',
-                '--ignore-certificate-errors-spki-list',
-                # '--user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"' # User agent is set in context
-                '--disable-gpu' # Already there but keep
-            ],
-            # "channel": "chrome" # This might require full Chrome install, trying without first to see if args help
-        }
-
-        # Try with 'msedge' or 'chrome' if default chromium fails and they are available
-        # For now, stick to chromium and args. If 'channel' is needed, it's a bigger setup change.
-
-        async with async_playwright() as p:
-            try:
-                # browser = await p.chromium.launch(**launch_options) # Default chromium
-                # Let's try specifying channel, assuming it might use a locally installed Chrome if available, or a Playwright-managed one.
-                # This is a common suggestion if the default Playwright Chromium build is too easily detected.
-                # If "chrome" channel is not found by Playwright, it will error.
-                try:
-                    browser = await p.chromium.launch(
-                        **launch_options,
-                        channel="chrome" # Attempt to use a branded Chrome build
-                    )
-                    logging.info("Attempting to launch with channel='chrome'")
-                except Exception as e_channel:
-                    logging.warning(f"Failed to launch with channel='chrome' ({e_channel}). Falling back to default Playwright Chromium.")
-                    # Remove channel from launch_options if it failed
-                    launch_options_no_channel = launch_options.copy()
-                    if "channel" in launch_options_no_channel: # Should not be needed based on above structure but good practice
-                        del launch_options_no_channel["channel"]
-                    browser = await p.chromium.launch(**launch_options_no_channel)
-                    logging.info("Launched with default Playwright Chromium.")
-
-
-                context = await browser.new_context(
-                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36', # A fairly common user agent
-                    java_script_enabled=True,
-                )
-                context.set_default_navigation_timeout(timeout)
-                context.set_default_timeout(timeout)
-
-                page = await context.new_page()
-                await page.set_viewport_size({"width": 1920, "height": 1080})
-
-                # Apply custom JS stealth
-                await apply_stealth_js(page)
-
-                logging.info(f"Navigating to {autotrader_url}")
-                await page.goto(autotrader_url, wait_until="domcontentloaded", timeout=timeout) # Reverted to domcontentloaded
-
-                title = await page.title()
-                logging.info(f"Page title: {title}")
-
-                if "unavailable" in title.lower() or "block" in title.lower() or "access denied" in title.lower():
-                    logging.critical(f"Failed to load AutoTrader listings page. Blocked by website. Title: {title}")
-                    await browser.close() # Ensure browser is closed before returning
-                    return []
-
-                # Using speculative selectors for AutoTrader
-                # Main container for listings: 'div[data-qaid="cntnr-lstng-main"]' (this might be too broad or incorrect)
-                # A more specific item selector might be needed, e.g., an article or a div with a specific class.
-                # For now, let's assume individual listing cards can be found with a selector like:
-                # "div.inventory-listing" or "div[data-cmp='inventoryListing']" - these are common patterns.
-                # The provided example 'div[data-qaid="cntnr-lstng-main"]' seems like it might be a single container FOR ALL listings.
-                # Let's try a more specific (but still guessed) selector for individual listing items.
-                # A common pattern is items within a list or grid. Let's try to find items:
-                # This selector is a **GUESS** based on common AutoTrader structures.
-                listing_item_selector = "div[data-cmp='inventoryListing']" # GUESS
-
-                # Fallback if the primary guess doesn't work, try another common pattern
-                # listing_item_selector_fallback = "div.inventory-listing.new-listing.stub" # Another GUESS
-
-                # await page.wait_for_selector(listing_item_selector, timeout=15000) # Wait for items to appear
-
-                listing_containers = await page.query_selector_all(listing_item_selector)
-
-                # if not listing_containers:
-                #     logging.info(f"No listings found with primary selector '{listing_item_selector}'. Trying fallback...")
-                #     listing_containers = await page.query_selector_all(listing_item_selector_fallback)
-
-                logging.info(f"Found {len(listing_containers)} potential listing containers using selector '{listing_item_selector}'.")
-
-                processed_count = 0
-                # first_container_processed_for_html_dump = False # REMOVE HTML DUMP FLAG
-                for i, container in enumerate(listing_containers):
-                    url_path = None
-                    title_text = "N/A" # Default to N/A
-                    price_text = "N/A" # Default to N/A
-                    mileage_text = "N/A" # Default to N/A (as it's not reliably on card)
-                    listing_url = None
-
-                    try:
-                        logging.debug(f"Processing container {i+1}/{len(listing_containers)}")
-
-                        # Attempt to get Title
-                        title_el = await container.query_selector("h2[data-cmp='subheading']") # Updated selector from HTML dump
-                        if title_el:
-                            raw_title_text = await title_el.inner_text()
-                            title_text = raw_title_text.strip() if raw_title_text else "N/A"
-
-                            # Attempt to get URL from parent <a> of title_el
-                            # Playwright's query_selector does not directly support xpath like "ancestor::a".
-                            # A common structure is <a><h3>...</h3></a> or <a><h2>...</h2></a>
-                            # We can try to find 'a' that contains this h2, or assume the 'a[data-cmp="link"]' is the one.
-
-                            # Let's use the a[data-cmp="link"] which was identified as containing the title h2
-                            parent_link_el = await container.query_selector("a[data-cmp='link']")
-                            if parent_link_el:
-                                url_path = await parent_link_el.get_attribute("href")
-                            else: # Fallback if the above structure isn't found
-                                logging.warning(f"Could not find parent a[data-cmp='link'] for title in listing {i+1}")
-                        else:
-                            logging.warning(f"Title not found with h2[data-cmp='subheading'] for listing {i+1}.")
-
-                        # Fallback or alternative for URL if not found via title's parent link
-                        if not url_path:
-                            url_el_alt = await container.query_selector("a[data-cmp='relLnk']") # Keep this fallback
-                            if url_el_alt:
-                                url_path = await url_el_alt.get_attribute("href")
-
-                        if not url_path: # Last resort for URL
-                            first_a = await container.query_selector("a[href]") # Broadest fallback
-                            if first_a:
-                                url_path = await first_a.get_attribute("href")
-
-                        if not url_path:
-                            logging.warning(f"Could not extract URL for listing {i+1} (Title: {title_text}). Skipping.")
-                            continue
-
-                        if not url_path.startswith(('http://', 'https://')):
-                            listing_url = f"https://www.autotrader.com{url_path}"
-                        else:
-                            listing_url = url_path
-
-                        # Attempt to get Price
-                        price_el = await container.query_selector("div[data-cmp='firstPrice']") # Updated selector
-                        if price_el:
-                            raw_price_text = await price_el.inner_text()
-                            price_text = raw_price_text.replace('$', '').replace(',', '').strip() if raw_price_text else "N/A"
-                        else:
-                            # Fallback for price (e.g. .first-price class directly)
-                            price_el_fallback = await container.query_selector(".first-price")
-                            if price_el_fallback:
-                                raw_price_text = await price_el_fallback.inner_text()
-                                price_text = raw_price_text.replace('$', '').replace(',', '').strip() if raw_price_text else "N/A"
-                            else:
-                                logging.warning(f"Price not found for listing {listing_url}")
-                                price_text = "N/A"
-
-
-                        # Mileage - Set to N/A as it's not reliably on the card from previous findings
-                        mileage_text = "N/A"
-                        # logging.info(f"Mileage not scraped from listing card for {listing_url} (by design for now).")
-
-                        vin_text = None
-
-                        listing_data = {
-                            "listing_url": listing_url,
-                            "title": title_text, # Already defaults to N/A or has value
-                            "price": price_text, # Already defaults to N/A or has value
-                            "mileage": mileage_text, # Is N/A
-                            "vin": vin_text,
-                            "source_name": self.source_name,
-                            "data_points": {
-                                "page_title_at_scrape": title # page's title, not listing's
-                            }
-                        }
-                        listings_data.append(listing_data)
-                        processed_count += 1
-                        logging.info(f"Successfully processed listing: {title_text[:50]}... URL: {listing_url}")
-
-                    except Exception as e:
-                        logging.error(f"Error processing listing container {i+1} for URL {listing_url if listing_url else 'Unknown'}: {e}", exc_info=True)
-                        continue
-
-                logging.info(f"Successfully processed {processed_count} out of {len(listing_containers)} listing containers.")
-
-            except Exception as e:
-                logging.error(f"An error occurred during Playwright scraping phase: {e}", exc_info=True)
-            finally:
-                if browser:
-                    logging.info("Closing browser.")
-                    await browser.close()
-
-        return listings_data
-
-
-async def scrape_autotrader_data(autotrader_url: str, headless: bool = True, timeout: int = 120000) -> list[dict]:
-    """
-    High-level function to scrape data from AutoTrader.
-    Initializes the scraper and calls its scraping method.
-
-    Args:
-        autotrader_url (str): The URL to scrape.
-        headless (bool): Whether to run the browser in headless mode.
-        timeout (int): Timeout for scraping operations in milliseconds.
-
-    Returns:
-        list[dict]: A list of scraped listing data.
-    """
-    scraper = AutoTraderScraper()
-    listings = await scraper.get_private_listings(autotrader_url=autotrader_url, headless=headless, timeout=timeout)
-    return listings
-
-
-async def scrape_autotrader_and_update_db(db: Session, autotrader_url: str, headless: bool, scrape_timeout: int):
-    """
-    Scrapes listings from AutoTrader and updates the database.
-
-    Args:
-        db (Session): The SQLAlchemy database session.
-        autotrader_url (str): The URL to scrape.
-        headless (bool): Whether to run the browser in headless mode.
-        scrape_timeout (int): Timeout for scraping operations in milliseconds.
-
-    Returns:
-        dict: A status dictionary with counts of added, updated, and scraped listings.
-    """
-    logging.info(f"Starting scrape and update for URL: {autotrader_url}")
-
-    try:
-        listings_data = await scrape_autotrader_data(
-            autotrader_url=autotrader_url,
-            headless=headless,
-            timeout=scrape_timeout
-        )
-    except Exception as e:
-        logging.error(f"Failed to scrape data from {autotrader_url}: {e}", exc_info=True)
-        return {"status": "error", "message": f"Scraping failed: {e}"}
-
-    added_count = 0
-    updated_count = 0
-    scraped_count = len(listings_data)
-
-    for listing_data in listings_data:
-        source_url = listing_data.get('listing_url') # Renamed from 'url' to 'listing_url' in dummy data
-        if not source_url:
-            logging.warning(f"Scraped item missing 'listing_url': {listing_data.get('title')}. Skipping.")
-            continue
-
-        try:
-            existing_listing = db.query(CarListing).filter(CarListing.source_url == source_url).first()
-
-            if existing_listing:
-                # Placeholder for update logic
-                # existing_listing.extracted_at = datetime.utcnow()
-                # existing_listing.data_points = {k: v for k, v in listing_data.items() if k != 'listing_url'}
-                # # Update other fields like price if necessary
-                # db.add(existing_listing) # Not strictly necessary if only mutable fields changed and session tracks
-                updated_count += 1
-                logging.info(f"Listing at {source_url} already exists. Marked for update (placeholder).")
-            else:
-                new_listing = CarListing(
-                    platform="autotrader",
-                    extracted_at=datetime.utcnow(),
-                    source_url=source_url,
-                    # Ensure data_points stores everything else from listing_data
-                    data_points={k: v for k, v in listing_data.items() if k != 'listing_url'}
-                )
-                db.add(new_listing)
-                added_count += 1
-                logging.info(f"New listing added from {source_url}")
-        except Exception as e:
-            logging.error(f"Error processing listing {source_url} for DB: {e}", exc_info=True)
-            # Decide if you want to rollback here or continue with other listings
-
-    try:
-        db.commit()
-        logging.info("Database changes committed.")
-    except Exception as e:
-        logging.error(f"Database commit failed: {e}", exc_info=True)
-        db.rollback()
-        return {"status": "error", "message": f"DB commit failed: {e}", "added": 0, "updated": 0, "scraped_count": scraped_count}
-
-    status_summary = {
-        "status": "success",
-        "added": added_count,
-        "updated": updated_count,
-        "scraped_count": scraped_count
-    }
-    logging.info(f"DB update summary: {status_summary}")
-    return status_summary
-
-async def main():
-    # Use settings from config.py
-    # url = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/private-seller")
-    # headless_str = os.getenv("HEADLESS_BROWSER", "True")
-    # headless = headless_str.lower() == "true"
-    # scrape_timeout_str = os.getenv("SCRAPE_TIMEOUT", "120000")
-    # try:
-    #     scrape_timeout = int(scrape_timeout_str)
-    # except ValueError:
-    #     logging.warning(f"Invalid SCRAPE_TIMEOUT value: {scrape_timeout_str}. Defaulting to 120000ms.")
-    #     scrape_timeout = 120000
-
-    # from database import SessionLocal # Already imported at the top
-    db: Session = SessionLocal() # SessionLocal now uses DATABASE_URL from config.py via database.py
-    try:
-        logging.info(f"Starting scraper and DB update for URL: {AUTOTRADER_URL}, Headless: {HEADLESS_BROWSER}, Timeout: {SCRAPE_TIMEOUT}ms")
-        stats = await scrape_autotrader_and_update_db(
-            db=db,
-            autotrader_url=AUTOTRADER_URL,
-            headless=HEADLESS_BROWSER,
-            scrape_timeout=SCRAPE_TIMEOUT
-        )
-        logging.info(f"Scraping and DB update completed: {stats}")
-    except Exception as e:
-        logging.error(f"Error during scraping and DB update in main: {e}", exc_info=True)
-    finally:
-        logging.info("Closing DB session in main.")
-        db.close()
-
-if __name__ == "__main__":
-    # To run this:
-    # 1. Ensure Playwright browsers are installed: `playwright install chromium`
-    # 2. Set environment variables if needed (AUTOTRADER_URL, HEADLESS_BROWSER, SCRAPE_TIMEOUT)
-    # 3. Uncomment the line below
-    asyncio.run(main())
-    # pass # Keep it passive for now, to be run manually when needed
diff --git a/config.py b/config.py
deleted file mode 100644
index 44148ca..0000000
--- a/config.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import os
-from dotenv import load_dotenv
-
-# Load environment variables from .env file if it exists
-# This is useful for local development.
-load_dotenv()
-
-# Database Configuration
-DATABASE_URL: str = os.getenv("DATABASE_URL", "sqlite:///./data/vehicle_tracker.db")
-
-# Scraper Configuration
-AUTOTRADER_URL: str = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/private-seller")
-SCRAPE_TIMEOUT: int = int(os.getenv("SCRAPE_TIMEOUT", "120000"))  # Milliseconds
-HEADLESS_BROWSER: bool = os.getenv("HEADLESS_BROWSER", "True").lower() == "true"
-
-# API Configuration (if any specific ones are needed later)
-# Example: API_HOST: str = os.getenv("API_HOST", "0.0.0.0")
-# Example: API_PORT: int = int(os.getenv("API_PORT", "8000"))
-
-# Logging Configuration (can also be added here if more complex)
-LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO").upper()
-
-# Ensure critical URLs have a scheme for robustness
-if not AUTOTRADER_URL.startswith(("http://", "https://")):
-    # This print statement is for immediate feedback during startup/import.
-    # In a pure library, side effects on import are sometimes discouraged,
-    # but for an application's main config, it's often acceptable.
-    print(f"Warning: AUTOTRADER_URL ('{AUTOTRADER_URL}') did not have a scheme, prepended https://.")
-    AUTOTRADER_URL = "https://" + AUTOTRADER_URL
-    print(f"Corrected AUTOTRADER_URL: {AUTOTRADER_URL}")
-
-
-# Example of how to handle SQLite connect_args based on config
-DB_CONNECT_ARGS: dict = {"check_same_thread": False} if DATABASE_URL.startswith("sqlite") else {}
diff --git a/database.py b/database.py
deleted file mode 100644
index 58a11dc..0000000
--- a/database.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from sqlalchemy import Column, Integer, String, DateTime, JSON, create_engine
-from sqlalchemy.orm import declarative_base, sessionmaker, Session
-from config import DATABASE_URL, DB_CONNECT_ARGS # Import from config
-
-# Use imported configuration
-engine = create_engine(DATABASE_URL, connect_args=DB_CONNECT_ARGS)
-
-SessionLocal = sessionmaker(bind=engine, autoflush=False)
-Base = declarative_base()
-
-class CarListing(Base):
-    __tablename__ = "listings"
-    id = Column(Integer, primary_key=True, index=True)
-    platform = Column(String)
-    extracted_at = Column(DateTime)
-    source_url = Column(String, unique=True)
-    data_points = Column(JSON)
-
-Base.metadata.create_all(bind=engine)
-
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..fb37fc5
--- /dev/null
+++ b/main.py
@@ -0,0 +1,93 @@
+import asyncio
+import uvicorn
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import logging
+from datetime import datetime
+
+from src.database import create_db_tables
+from src.api.routes import router as api_v1_router
+from src.config import settings
+from src.automation.browser_sim import run_autotrader_scraper_example_standalone
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler()]
+)
+logger = logging.getLogger(__name__)
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    logger.info("\ud83d\ude80 Starting Educational Vehicle Tracker API...")
+    await create_db_tables()
+    logger.info("\ud83d\udcca Database tables checked/created.")
+    yield
+    logger.info("\ud83d\udd1b Shutting down Educational Vehicle Tracker API.")
+
+app = FastAPI(
+    title="Educational Vehicle Tracker",
+    description="An educational system for learning web automation and data pipeline architecture, now with real scraping.",
+    version="1.1.0",
+    lifespan=lifespan
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.include_router(api_v1_router, prefix="/api/v1")
+
+@app.get("/", include_in_schema=False)
+async def root_redirect_to_docs():
+    from fastapi.responses import RedirectResponse
+    return RedirectResponse(url="/docs")
+
+@app.get("/health", summary="Health Check")
+async def health_check():
+    return {"status": "healthy", "service": "vehicle-tracker-api", "timestamp": datetime.utcnow()}
+
+async def run_standalone_scrape_cli():
+    logger.info("\ud83c\udf3d Running Standalone AutoTrader Scraper Example from CLI")
+    print("=" * 40)
+    try:
+        await run_autotrader_scraper_example_standalone()
+        logger.info("\u2705 Standalone scraper example completed successfully!")
+    except Exception as e:
+        logger.error(f"\u274c Standalone scraper example failed: {e}", exc_info=True)
+
+if __name__ == "__main__":
+    import sys
+    print_startup_message = True
+    if len(sys.argv) > 1:
+        if sys.argv[1] == "scrape_test":
+            print_startup_message = False
+            asyncio.run(run_standalone_scrape_cli())
+        elif sys.argv[1] == "create_tables":
+            print_startup_message = False
+            asyncio.run(create_db_tables())
+            logger.info("Database tables creation process finished.")
+        else:
+            logger.warning(f"Unknown command: {sys.argv[1]}")
+            print("\ud83d\udd0d Usage: python main.py [scrape_test | create_tables]")
+    if print_startup_message:
+        logger.info("\ud83c\udf93 Educational Vehicle Tracking System - API Server Mode")
+        print("=" * 50)
+        logger.info(f"API Host: {settings.API_HOST}")
+        logger.info(f"API Port: {settings.API_PORT}")
+        logger.info(f"Database: {settings.DATABASE_URL}")
+        logger.info(f"Max Listings per Session (Scrape): {settings.MAX_LISTINGS_PER_SESSION}")
+        logger.info(f"Playwright Headless: {settings.HEADLESS}")
+        print("=" * 50)
+        uvicorn.run(
+            "main:app",
+            host=settings.API_HOST,
+            port=settings.API_PORT,
+            reload=True,
+            log_level="info"
+        )
diff --git a/render.yaml b/render.yaml
index c0e7ffa..f755e35 100644
--- a/render.yaml
+++ b/render.yaml
@@ -5,15 +5,25 @@ services:
     buildCommand: |
       pip install -r requirements.txt
       playwright install chromium
-    startCommand: uvicorn app:app --host 0.0.0.0 --port ${PORT:-8000}
+    startCommand: uvicorn main:app --host 0.0.0.0 --port ${PORT:-8000}
     envVars:
       - key: PYTHON_VERSION
-        value: 3.11  # Or your desired Python version
+        value: 3.11
       - key: DATABASE_URL
-        generateValue: true # For Render PostgreSQL, or set manually for SQLite/external DB
-      - key: AUTOTRADER_URL
-        value: "https://www.autotrader.com/cars-for-sale/private-seller" # Example
-      - key: SCRAPE_TIMEOUT
-        value: 120000 # Example: 120 seconds
-      - key: HEADLESS_BROWSER
+        generateValue: true
+      - key: HEADLESS
         value: "True"
+      - key: BROWSER_TIMEOUT
+        value: "60000"
+      - key: PAGE_DELAY
+        value: "5000"
+      - key: MIN_DELAY_BETWEEN_ACTIONS
+        value: "2.5"
+      - key: MAX_LISTINGS_PER_SESSION
+        value: "25"
+      - key: PROXY_SERVER
+        value: ""
+      - key: PROXY_USERNAME
+        value: ""
+      - key: PROXY_PASSWORD
+        value: ""
diff --git a/requirements.txt b/requirements.txt
index 2e8a221..b450ec4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,7 @@
-aiofiles
-asyncpg
-fastapi
-playwright
-playwright-stealth
-python-dotenv
-sqlalchemy[asyncio]
-uvicorn[standard]
+fastapi==0.104.1
+uvicorn==0.24.0
+sqlalchemy==2.0.23
+aiosqlite==0.19.0
+playwright==1.40.0
+python-dotenv==1.0.0
+pydantic==2.5.0
diff --git a/app/__init__.py b/src/__init__.py
similarity index 100%
rename from app/__init__.py
rename to src/__init__.py
diff --git a/src/api/__init__.py b/src/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/api/routes.py b/src/api/routes.py
new file mode 100644
index 0000000..612fb3f
--- /dev/null
+++ b/src/api/routes.py
@@ -0,0 +1,237 @@
+from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select, and_, exists, update
+from datetime import datetime
+import json
+from typing import List
+import logging
+
+from src.database import get_db, AsyncSessionLocal
+from src.models.vehicle import (
+    VehicleListing,
+    VehicleListingCreate,
+    VehicleListingResponse,
+    SearchFilters
+)
+from src.automation.browser_sim import AutoTraderScraper
+from src.config import settings
+
+router = APIRouter()
+logger = logging.getLogger(__name__)
+
+async def scrape_and_store_task(search_url: str, max_listings: int, source_site_name: str = "autotrader"):
+    logger.info(f"Background task started: Scraping {source_site_name} URL: {search_url} for max {max_listings} listings.")
+    created_count = 0
+    updated_count = 0
+    failed_count = 0
+    processed_urls = set()
+    if source_site_name.lower() == "autotrader":
+        ScraperClass = AutoTraderScraper
+    else:
+        logger.error(f"Unsupported source site: {source_site_name}")
+        return
+    async with ScraperClass() as scraper:
+        scraped_listings_pydantic = await scraper.scrape_listings(
+            search_url=search_url,
+            max_listings_to_fetch=max_listings
+        )
+    if not scraped_listings_pydantic:
+        logger.info(f"No listings returned from {source_site_name} scraper for URL: {search_url}")
+        return
+    logger.info(f"{source_site_name} scraper returned {len(scraped_listings_pydantic)} listings. Processing for DB storage...")
+    async with AsyncSessionLocal() as db_session:
+        for listing_data in scraped_listings_pydantic:
+            if not listing_data.listing_url:
+                logger.warning("Scraped data missing listing_url, skipping.")
+                failed_count += 1
+                continue
+            if listing_data.listing_url in processed_urls:
+                logger.debug(f"URL {listing_data.listing_url} already processed in this run, skipping duplicate.")
+                continue
+            processed_urls.add(listing_data.listing_url)
+            try:
+                stmt = select(VehicleListing).where(VehicleListing.listing_url == listing_data.listing_url)
+                result = await db_session.execute(stmt)
+                existing_vehicle = result.scalar_one_or_none()
+                features_json = json.dumps(listing_data.features) if listing_data.features else None
+                if existing_vehicle:
+                    logger.debug(f"Updating existing listing: {listing_data.listing_url} (ID: {existing_vehicle.id})")
+                    update_values = {}
+                    if listing_data.title and existing_vehicle.title != listing_data.title:
+                        update_values['title'] = listing_data.title
+                    if listing_data.price is not None and existing_vehicle.price != listing_data.price:
+                        update_values['price'] = listing_data.price
+                    if listing_data.mileage is not None and existing_vehicle.mileage != listing_data.mileage:
+                        update_values['mileage'] = listing_data.mileage
+                    if features_json and existing_vehicle.features != features_json:
+                        update_values['features'] = features_json
+                    if listing_data.photo_url and existing_vehicle.photo_url != listing_data.photo_url:
+                        update_values['photo_url'] = listing_data.photo_url
+                    if listing_data.location and existing_vehicle.location != listing_data.location:
+                        update_values['location'] = listing_data.location
+                    if listing_data.year and existing_vehicle.year != listing_data.year:
+                        update_values['year'] = listing_data.year
+                    if listing_data.make and existing_vehicle.make != listing_data.make:
+                        update_values['make'] = listing_data.make
+                    if listing_data.model and existing_vehicle.model != listing_data.model:
+                        update_values['model'] = listing_data.model
+                    if listing_data.trim and existing_vehicle.trim != listing_data.trim:
+                        update_values['trim'] = listing_data.trim
+                    update_values['is_active'] = True
+                    update_values['last_scraped_at'] = datetime.utcnow()
+                    if update_values:
+                        stmt_update = update(VehicleListing).where(VehicleListing.id == existing_vehicle.id).values(**update_values)
+                        await db_session.execute(stmt_update)
+                        updated_count += 1
+                else:
+                    logger.debug(f"Adding new listing: {listing_data.listing_url}")
+                    db_vehicle = VehicleListing(
+                        listing_id_external=listing_data.listing_id_external,
+                        title=listing_data.title,
+                        year=listing_data.year,
+                        make=listing_data.make,
+                        model=listing_data.model,
+                        trim=listing_data.trim,
+                        price=listing_data.price,
+                        mileage=listing_data.mileage,
+                        listing_url=listing_data.listing_url,
+                        photo_url=listing_data.photo_url,
+                        features=features_json,
+                        location=listing_data.location,
+                        seller_type=listing_data.seller_type,
+                        source_site=listing_data.source_site,
+                        is_active=True,
+                        last_scraped_at=datetime.utcnow()
+                    )
+                    db_session.add(db_vehicle)
+                    created_count += 1
+                await db_session.commit()
+            except Exception as e:
+                failed_count += 1
+                logger.error(f"Failed to process/store listing {listing_data.listing_url}: {e}", exc_info=True)
+                await db_session.rollback()
+        logger.info(f"Background task for {source_site_name} finished. Created={created_count}, Updated={updated_count}, Failed={failed_count}")
+
+@router.get("/", response_model=dict, include_in_schema=False)
+async def api_v1_root_info():
+    return {
+        "message": "Vehicle Tracking API - V1",
+        "active_endpoints": ["/vehicles", "/vehicles/search", "/vehicles/scrape", "/vehicles/{id}", "/vehicles/stats/summary"]
+    }
+
+@router.get("/vehicles/", response_model=List[VehicleListingResponse])
+async def get_all_vehicles(
+    skip: int = Query(0, ge=0),
+    limit: int = Query(settings.MAX_LISTINGS_PER_SESSION, ge=1, le=200),
+    db: AsyncSession = Depends(get_db),
+    filters: SearchFilters = Depends(),
+):
+    query = select(VehicleListing)
+    conditions = []
+    if filters.is_active is not None:
+        conditions.append(VehicleListing.is_active == filters.is_active)
+    if filters.make:
+        conditions.append(VehicleListing.make.ilike(f"%{filters.make}%"))
+    if filters.model:
+        conditions.append(VehicleListing.model.ilike(f"%{filters.model}%"))
+    if filters.min_year:
+        conditions.append(VehicleListing.year >= filters.min_year)
+    if filters.max_year:
+        conditions.append(VehicleListing.year <= filters.max_year)
+    if filters.min_price:
+        conditions.append(VehicleListing.price >= filters.min_price)
+    if filters.max_price:
+        conditions.append(VehicleListing.price <= filters.max_price)
+    if filters.max_mileage:
+        conditions.append(VehicleListing.mileage <= filters.max_mileage)
+    if filters.location:
+        conditions.append(VehicleListing.location.ilike(f"%{filters.location}%"))
+    if filters.seller_type:
+        conditions.append(VehicleListing.seller_type.ilike(f"%{filters.seller_type}%"))
+    if filters.source_site:
+        conditions.append(VehicleListing.source_site.ilike(f"%{filters.source_site}%"))
+    if conditions:
+        query = query.where(and_(*conditions))
+    query = query.order_by(VehicleListing.last_scraped_at.desc(), VehicleListing.created_at.desc())
+    result = await db.execute(query.offset(skip).limit(limit))
+    vehicles = result.scalars().all()
+    response_vehicles = []
+    for vehicle_db_item in vehicles:
+        response_vehicles.append(VehicleListingResponse.model_validate(vehicle_db_item))
+    return response_vehicles
+
+@router.get("/vehicles/{vehicle_id}", response_model=VehicleListingResponse)
+async def get_vehicle_by_id_route(vehicle_id: int, db: AsyncSession = Depends(get_db)):
+    query = select(VehicleListing).where(VehicleListing.id == vehicle_id)
+    result = await db.execute(query)
+    vehicle_db_item = result.scalar_one_or_none()
+    if not vehicle_db_item:
+        raise HTTPException(status_code=404, detail="Vehicle not found")
+    return VehicleListingResponse.model_validate(vehicle_db_item)
+
+@router.post("/vehicles/", response_model=VehicleListingResponse, status_code=201)
+async def create_vehicle_listing_manual(
+    vehicle_create_data: VehicleListingCreate,
+    db: AsyncSession = Depends(get_db)
+):
+    stmt_exists = select(exists().where(VehicleListing.listing_url == vehicle_create_data.listing_url))
+    url_exists = await db.scalar(stmt_exists)
+    if url_exists:
+        raise HTTPException(status_code=409, detail=f"Vehicle with URL {vehicle_create_data.listing_url} already exists.")
+    features_json_str = json.dumps(vehicle_create_data.features) if vehicle_create_data.features else None
+    db_vehicle_item = VehicleListing(
+        **vehicle_create_data.model_dump(exclude={'features'}),
+        features=features_json_str,
+        is_active=True,
+        last_scraped_at=datetime.utcnow()
+    )
+    db.add(db_vehicle_item)
+    await db.commit()
+    await db.refresh(db_vehicle_item)
+    return VehicleListingResponse.model_validate(db_vehicle_item)
+
+@router.post("/vehicles/scrape", status_code=202)
+async def trigger_site_scrape(
+    background_tasks: BackgroundTasks,
+    site_name: str = Query("autotrader", description="Name of the site to scrape (e.g., 'autotrader')."),
+    search_url: str = Query(..., description="Full search URL for the specified site."),
+    max_listings: int = Query(settings.MAX_LISTINGS_PER_SESSION, description="Maximum listings to fetch from this scrape.", ge=1, le=100)
+):
+    logger.info(f"Received request to scrape {site_name} URL: {search_url} for max {max_listings} listings.")
+    if site_name.lower() not in ["autotrader"]:
+        raise HTTPException(status_code=400, detail=f"Scraping for site '{site_name}' is not supported.")
+    background_tasks.add_task(scrape_and_store_task, search_url, max_listings, site_name)
+    return {"message": f"{site_name.capitalize()} scraping task accepted and started in the background for URL: {search_url}"}
+
+@router.delete("/vehicles/{vehicle_id}", status_code=200)
+async def delete_vehicle_listing(vehicle_id: int, db: AsyncSession = Depends(get_db)):
+    query = select(VehicleListing).where(VehicleListing.id == vehicle_id)
+    result = await db.execute(query)
+    vehicle_db_item = result.scalar_one_or_none()
+    if not vehicle_db_item:
+        raise HTTPException(status_code=404, detail="Vehicle not found")
+    await db.delete(vehicle_db_item)
+    await db.commit()
+    return {"message": "Vehicle deleted successfully"}
+
+@router.get("/vehicles/stats/summary", response_model=dict)
+async def get_vehicle_listing_stats(db: AsyncSession = Depends(get_db)):
+    from sqlalchemy import func as sql_func
+    make_query = select(VehicleListing.make, sql_func.count(VehicleListing.id).label('count'))\
+                 .where(VehicleListing.make.isnot(None)).group_by(VehicleListing.make).order_by(sql_func.count(VehicleListing.id).desc())
+    make_result = await db.execute(make_query)
+    make_stats = [{"make": row[0], "count": row[1]} for row in make_result.all()]
+    year_query = select(VehicleListing.year, sql_func.avg(VehicleListing.price).label('avg_price'), sql_func.count(VehicleListing.id).label('count'))\
+                 .where(VehicleListing.year.isnot(None)).group_by(VehicleListing.year).order_by(VehicleListing.year.desc())
+    year_result = await db.execute(year_query)
+    year_stats = [{"year": row[0], "avg_price": round(row[1], 2) if row[1] else 0.0, "count": row[2]} for row in year_result.all()]
+    total_query = select(sql_func.count(VehicleListing.id))
+    total_count = await db.scalar(total_query) or 0
+    active_query = select(sql_func.count(VehicleListing.id)).where(VehicleListing.is_active == True)
+    active_count = await db.scalar(active_query) or 0
+    return {
+        "total_listings_in_db": total_count,
+        "active_listings": active_count,
+        "by_make": make_stats,
+        "by_year_with_avg_price": year_stats
+    }
diff --git a/src/automation/__init__.py b/src/automation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/automation/browser_sim.py b/src/automation/browser_sim.py
new file mode 100644
index 0000000..c93f204
--- /dev/null
+++ b/src/automation/browser_sim.py
@@ -0,0 +1,420 @@
+import asyncio
+import json
+import re
+import random
+from typing import List, Dict, Optional
+from playwright.async_api import async_playwright, Page, Browser, PlaywrightException, Locator
+from urllib.parse import urljoin, urlparse, parse_qs
+from datetime import datetime
+import logging
+import hashlib
+
+from src.config import settings
+from src.models.vehicle import VehicleListingCreate
+
+logger = logging.getLogger(__name__)
+
+class AutoTraderScraper:
+    def __init__(self):
+        self.browser: Optional[Browser] = None
+        self.playwright_instance: Optional[async_playwright] = None
+        self.base_action_delay = settings.MIN_DELAY_BETWEEN_ACTIONS
+        self.page_load_delay = settings.PAGE_DELAY / 1000
+
+    async def __aenter__(self):
+        logger.info("Initializing AutoTrader Scraper...")
+        self.playwright_instance = await async_playwright().start()
+        try:
+            proxy_cfg = None
+            if settings.PROXY_SERVER:
+                proxy_cfg = {"server": settings.PROXY_SERVER}
+                if settings.PROXY_USERNAME and settings.PROXY_PASSWORD:
+                    proxy_cfg["username"] = settings.PROXY_USERNAME
+                    proxy_cfg["password"] = settings.PROXY_PASSWORD
+
+            self.browser = await self.playwright_instance.chromium.launch(
+                headless=settings.HEADLESS,
+                proxy=proxy_cfg,
+                args=[
+                    '--no-sandbox',
+                    '--disable-setuid-sandbox',
+                    '--disable-infobars',
+                    '--window-position=0,0',
+                    '--ignore-certificate-errors',
+                    '--ignore-certificate-errors-spki-list',
+                    '--disable-blink-features=AutomationControlled',
+                    '--disable-dev-shm-usage'
+                ],
+                timeout=settings.BROWSER_TIMEOUT
+            )
+            logger.info(f"Browser launched (Headless: {settings.HEADLESS})")
+        except PlaywrightException as e:
+            logger.error(f"Failed to launch browser: {e}")
+            if self.playwright_instance:
+                await self.playwright_instance.stop()
+            raise
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        logger.info("Closing AutoTrader Scraper resources...")
+        if self.browser and self.browser.is_connected():
+            try:
+                await self.browser.close()
+                logger.info("Browser closed.")
+            except PlaywrightException as e:
+                logger.error(f"Error closing browser: {e}")
+        if self.playwright_instance:
+            try:
+                await self.playwright_instance.stop()
+                logger.info("Playwright instance stopped.")
+            except Exception as e:
+                logger.error(f"Error stopping Playwright: {e}")
+        if exc_type:
+            logger.error(f"Exception occurred during scraping: {exc_val}", exc_info=(exc_type, exc_val, exc_tb))
+
+    async def _apply_stealth_measures(self, page: Page):
+        logger.info("Applying stealth measures to page...")
+        user_agents = [
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
+            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
+        ]
+        await page.set_extra_http_headers({"User-Agent": random.choice(user_agents)})
+        await page.add_init_script("""
+            Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
+            Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en', 'en-GB'] });
+            const pluginCount = Math.floor(Math.random() * 3) + 1;
+            Object.defineProperty(navigator, 'plugins', {
+                get: () => Array(pluginCount).fill(null).map((_, i) => ({ name: `Plugin ${i}`, filename: `plugin${i}.dll`, description: `Mock plugin ${i}` }))
+            });
+            const mimeTypeCount = Math.floor(Math.random() * 3) + 1;
+             Object.defineProperty(navigator, 'mimeTypes', {
+                get: () => Array(mimeTypeCount).fill(null).map((_, i) => ({ type: `application/x-mimetype${i}`, suffixes: `m${i}`, description: `Mock mimetype ${i}` }))
+            });
+            const getParameter = WebGLRenderingContext.prototype.getParameter;
+            WebGLRenderingContext.prototype.getParameter = function(parameter) {
+                if (parameter === 37445) return 'Intel Open Source Technology Center';
+                if (parameter === 37446) return 'Mesa DRI Intel(R) Iris Xe Graphics (TGL GT2)';
+                return getParameter.apply(this, arguments);
+            };
+            const originalQuery = window.navigator.permissions.query;
+            window.navigator.permissions.query = (parameters) => (
+                parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters)
+            );
+            try { Date.prototype.getTimezoneOffset = function() { return -Math.floor(Math.random() * 8 + 3) * 60; }; } catch (e) {}
+        """)
+        viewports = [{"width": 1920, "height": 1080}, {"width": 1366, "height": 768}, {"width": 1440, "height": 900}, {"width": 2560, "height": 1440}]
+        await page.set_viewport_size(random.choice(viewports))
+        logger.info("Stealth measures applied.")
+
+    async def _human_like_delay(self, min_delay: Optional[float] = None, max_delay: Optional[float] = None):
+        min_d = min_delay if min_delay is not None else self.base_action_delay
+        max_d = max_delay if max_delay is not None else self.base_action_delay + 2.0
+        delay = random.uniform(min_d, max_d)
+        logger.debug(f"Waiting {delay:.2f} seconds...")
+        await asyncio.sleep(delay)
+
+    async def _human_like_scroll(self, page: Page, scroll_attempts=7):
+        logger.info(f"Performing human-like scrolling: {scroll_attempts} attempts...")
+        previous_scroll_height = -1.0
+        for i in range(scroll_attempts):
+            current_scroll_height = float(await page.evaluate("document.body.scrollHeight"))
+            if abs(current_scroll_height - previous_scroll_height) < 1.0 and i > 0:
+                logger.info(f"Scroll attempt {i+1}: Reached end of scrollable content or no new content loaded.")
+                break
+            scroll_amount = await page.evaluate(f"Math.random() * window.innerHeight * 0.7 + window.innerHeight * 0.3")
+            await page.evaluate(f"window.scrollBy(0, {scroll_amount})")
+            await self._human_like_delay(min_delay=0.8, max_delay=2.2)
+            previous_scroll_height = current_scroll_height
+        logger.info("Scrolling to bottom one last time...")
+        await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+        await self._human_like_delay(min_delay=2.0, max_delay=3.5)
+        logger.info("Scrolling finished.")
+
+    def _extract_listing_id_from_url(self, url: str) -> Optional[str]:
+        if not url:
+            return None
+        try:
+            parsed_url = urlparse(url)
+            query_params = parse_qs(parsed_url.query)
+            if 'listingId' in query_params:
+                return query_params['listingId'][0]
+            path_parts = [part for part in parsed_url.path.split('/') if part]
+            if 'vehicle' in path_parts:
+                vehicle_idx = path_parts.index('vehicle')
+                if vehicle_idx + 1 < len(path_parts):
+                    return path_parts[vehicle_idx+1]
+            for part in reversed(path_parts):
+                if part.isdigit() and len(part) > 5:
+                    return part
+        except Exception as e:
+            logger.warning(f"Could not parse structured listing ID from URL {url}: {e}")
+        logger.debug(f"No structured ID found, hashing URL for ID: {url}")
+        return hashlib.md5(url.encode()).hexdigest()[:16]
+
+    def _parse_title_details(self, title_str: str) -> Dict:
+        details = {'year': None, 'make': None, 'model': None, 'trim': None}
+        if not title_str:
+            return details
+        original_title = title_str
+        year_match = re.search(r'\b(19[89]\d|20[0-2]\d|2030)\b', title_str)
+        if year_match:
+            details['year'] = int(year_match.group(1))
+            title_str = title_str.replace(year_match.group(1), "", 1).strip()
+        title_str = re.sub(r'^(Used|New|Certified Pre-Owned|CPO)\s+', '', title_str, flags=re.IGNORECASE).strip()
+        parts = title_str.split(maxsplit=3)
+        if len(parts) > 0:
+            details['make'] = parts[0]
+        if len(parts) > 1:
+            details['model'] = parts[1]
+        if len(parts) > 2:
+            details['trim'] = " ".join(parts[2:])
+        logger.debug(f"Parsed title details: {details} from original title: '{original_title}'")
+        return details
+
+    async def _extract_listing_data(self, listing_element: Locator, page_url: str) -> Optional[VehicleListingCreate]:
+        data_dict: Dict[str, any] = {}
+        listing_html_for_debug = "N/A (HTML not captured)"
+        try:
+            link_el_selectors = [
+                'a[data-cmp="inventoryListingCardLink"]',
+                'a[data-testid="srp-list-item-link"]',
+                'a[href*="vehicledetails.xhtml?listingId="]',
+                'h2 > a',
+                'h3 > a'
+            ]
+            raw_href = None
+            for selector in link_el_selectors:
+                link_el = listing_element.locator(selector).first
+                if await link_el.count():
+                    raw_href = await link_el.get_attribute("href", timeout=1500)
+                    if raw_href:
+                        break
+            if not raw_href:
+                logger.warning("No primary link found for a listing card. Skipping.")
+                return None
+            data_dict['listing_url'] = urljoin(page_url, raw_href)
+
+            title_el_selectors = ["h2[data-cmp*='title']", "h3[data-cmp*='title']", "div[data-cmp='displayName'] h2", "h2", "h3"]
+            raw_title = "Title Not Found"
+            for selector in title_el_selectors:
+                title_el = listing_element.locator(selector).first
+                if await title_el.count():
+                    try:
+                        raw_title = await title_el.text_content(timeout=1500)
+                        if raw_title and raw_title.strip():
+                            break
+                    except PlaywrightException:
+                        continue
+            data_dict['title'] = raw_title.strip()
+            title_details = self._parse_title_details(data_dict['title'])
+            data_dict.update(title_details)
+
+            price_selectors = [
+                "span[data-cmp='pricingSection'] .text-size-lg-3",
+                "span[data-cmp='pricingSection']",
+                ".pricing-section .first-price",
+                "span[class*='price']", "div[class*='price']"
+            ]
+            for selector in price_selectors:
+                price_el = listing_element.locator(selector).filter(has_text=re.compile(r"\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?")).first
+                if await price_el.count():
+                    try:
+                        price_text = await price_el.text_content(timeout=1000)
+                        cleaned_price = re.sub(r'[^\d.]', '', price_text)
+                        if cleaned_price and cleaned_price != '.':
+                            data_dict['price'] = float(cleaned_price)
+                            break
+                    except PlaywrightException:
+                        continue
+
+            mileage_selectors = [
+                "div[data-cmp='listUnstyled'] li:has-text('miles')",
+                "div.item-vehicle-mileage",
+                "div[class*='mileage']", "span[class*='mileage']"
+            ]
+            for selector in mileage_selectors:
+                mileage_el = listing_element.locator(selector).filter(has_text=re.compile(r"[\d,]+\s*mi(?:les)?", re.IGNORECASE)).first
+                if await mileage_el.count():
+                    try:
+                        mileage_text = await mileage_el.text_content(timeout=1000)
+                        match = re.search(r'([\d,]+)\s*mi', mileage_text, re.IGNORECASE)
+                        if match:
+                            data_dict['mileage'] = int(re.sub(r',', '', match.group(1)))
+                            break
+                    except PlaywrightException:
+                        continue
+
+            photo_selectors = [
+                'img[data-cmp="responsiveImage"]',
+                'img[data-testid="srp-list-item-image"]',
+                '.srp-img-container img',
+                'img[alt*="vehicle image"]'
+            ]
+            for selector in photo_selectors:
+                photo_el = listing_element.locator(selector).first
+                if await photo_el.count():
+                    try:
+                        src = await photo_el.get_attribute("src", timeout=1000)
+                        if src and not src.startswith('data:image'):
+                            data_dict['photo_url'] = urljoin(page_url, src)
+                            break
+                    except PlaywrightException:
+                        continue
+
+            features_list = []
+            feature_selectors = ["ul[class*='features'] li", "div[data-cmp='pill']", ".item-特色 span"]
+            for selector in feature_selectors:
+                feature_elements = await listing_element.locator(selector).all()
+                for fe_el in feature_elements[:5]:
+                    try:
+                        f_text = await fe_el.text_content(timeout=500)
+                        if f_text and len(f_text.strip()) > 2 and len(f_text.strip()) < 50:
+                            features_list.append(f_text.strip())
+                    except PlaywrightException:
+                        continue
+                if features_list:
+                    break
+            data_dict['features'] = list(set(features_list))
+
+            location_selectors = ["div[data-cmp*='location']", "div.text-gray-dark.text-truncate", ".item-location"]
+            for selector in location_selectors:
+                location_el = listing_element.locator(selector).first
+                if await location_el.count():
+                    try:
+                        loc_text = await location_el.text_content(timeout=1000)
+                        data_dict['location'] = loc_text.replace('Located in', '').replace('Dealership Location', '').strip()
+                        if data_dict['location']:
+                            break
+                    except PlaywrightException:
+                        continue
+
+            data_dict['listing_id_external'] = self._extract_listing_id_from_url(data_dict['listing_url'])
+
+            return VehicleListingCreate(**data_dict)
+
+        except PlaywrightException as e:
+            listing_html_for_debug = await listing_element.evaluate("element => element.outerHTML", timeout=1000)
+            logger.error(f"Playwright error extracting data from a listing card: {e}. HTML: {listing_html_for_debug[:500]}...")
+        except Exception as e:
+            listing_html_for_debug = await listing_element.evaluate("element => element.outerHTML", timeout=1000)
+            logger.error(f"General error extracting data from a listing card: {e}. HTML: {listing_html_for_debug[:500]}...")
+        return None
+
+    async def scrape_listings(self, search_url: str, max_listings_to_fetch: int) -> List[VehicleListingCreate]:
+        if not self.browser or not self.browser.is_connected():
+            logger.error("Browser not initialized or not connected. Call within async context manager.")
+            return []
+        page: Optional[Page] = None
+        processed_listings: List[VehicleListingCreate] = []
+        try:
+            context = await self.browser.new_context(
+                java_script_enabled=True,
+                accept_downloads=False,
+                locale='en-US'
+            )
+            page = await context.new_page()
+            await self._apply_stealth_measures(page)
+            logger.info(f"Navigating to search URL: {search_url}")
+            await page.goto(search_url, wait_until="domcontentloaded", timeout=settings.BROWSER_TIMEOUT)
+            await self._human_like_delay(min_delay=self.page_load_delay, max_delay=self.page_load_delay + 3.0)
+            cookie_selectors = ['#onetrust-accept-btn-handler', 'button:has-text("Accept All Cookies")']
+            for cs_selector in cookie_selectors:
+                try:
+                    cookie_button = page.locator(cs_selector).first
+                    if await cookie_button.is_visible(timeout=3000):
+                        await cookie_button.click(timeout=5000, delay=random.uniform(0.3,0.8)*1000)
+                        logger.info(f"Clicked cookie banner: {cs_selector}")
+                        await self._human_like_delay(1.5, 2.5)
+                        break
+                except PlaywrightException:
+                    logger.debug(f"Cookie banner not found/visible or clickable with: {cs_selector}")
+            await self._human_like_scroll(page, scroll_attempts=settings.MAX_LISTINGS_PER_SESSION // 5 or 5)
+            listing_card_selectors = [
+                "article[data-cmp='inventoryListing']",
+                "div[data-testid='srp-listing-item']",
+                "div[data-cmp='inventorySpotlightListingCard']",
+                ".inventory-listing",
+                "div[class*='srp-results'] div[class*='vehicle-card']"
+            ]
+            all_card_elements_locators = []
+            for selector in listing_card_selectors:
+                elements_on_page = await page.locator(selector).count()
+                if elements_on_page > 0:
+                    logger.info(f"Found {elements_on_page} cards with selector '{selector}'")
+                    all_card_elements_locators.append(page.locator(selector))
+                    if selector in ["article[data-cmp='inventoryListing']", "div[data-testid='srp-listing-item']"]:
+                        break
+            final_card_locator = None
+            if all_card_elements_locators:
+                final_card_locator = all_card_elements_locators[0]
+            if not final_card_locator:
+                logger.warning(f"No listing cards found on page: {search_url}.")
+                try:
+                    await page.screenshot(path=f"debug_no_listings_{datetime.now():%Y%m%d%H%M%S}.png")
+                except Exception as e:
+                    logger.error(f"Failed to save screenshot: {e}")
+                return []
+            num_cards_on_page = await final_card_locator.count()
+            logger.info(f"Total listing cards to process with chosen locator: {num_cards_on_page}")
+            for i in range(num_cards_on_page):
+                if len(processed_listings) >= max_listings_to_fetch:
+                    logger.info(f"Reached max listings to fetch: {max_listings_to_fetch}")
+                    break
+                card_element = final_card_locator.nth(i)
+                logger.info(f"Processing card {i+1}/{num_cards_on_page}...")
+                try:
+                    if not await card_element.is_visible(timeout=3000):
+                        await card_element.scroll_into_view_if_needed(timeout=5000)
+                        await self._human_like_delay(0.5, 1.0)
+                except PlaywrightException as e:
+                    logger.warning(f"Card {i+1} not visible or could not scroll into view, skipping: {e}")
+                    continue
+                listing_data = await self._extract_listing_data(card_element, page.url)
+                if listing_data:
+                    processed_listings.append(listing_data)
+                    logger.info(f"Successfully extracted: {listing_data.title[:60]}... ({listing_data.listing_id_external})")
+                else:
+                    logger.warning(f"Failed to extract complete data from card {i+1}.")
+                await self._human_like_delay()
+        except PlaywrightException as e:
+            logger.error(f"A Playwright error occurred during scraping session for {search_url}: {e}", exc_info=True)
+            if page:
+                try:
+                    await page.screenshot(path=f"error_pw_session_{datetime.now():%Y%m%d%H%M%S}.png")
+                except Exception as se:
+                    logger.error(f"Failed to save error screenshot: {se}")
+        except Exception as e:
+            logger.error(f"An unexpected error occurred during scraping session for {search_url}: {e}", exc_info=True)
+            if page:
+                try:
+                    await page.screenshot(path=f"error_unexpected_session_{datetime.now():%Y%m%d%H%M%S}.png")
+                except Exception as se:
+                    logger.error(f"Failed to save error screenshot: {se}")
+        finally:
+            if page:
+                try:
+                    await page.close()
+                except PlaywrightException as e:
+                    logger.error(f"Error closing page: {e}")
+            if 'context' in locals() and context:
+                try:
+                    await context.close()
+                except PlaywrightException as e:
+                    logger.error(f"Error closing context: {e}")
+        logger.info(f"Scraping session for {search_url} finished. Extracted {len(processed_listings)} listings.")
+        return processed_listings[:max_listings_to_fetch]
+
+async def run_autotrader_scraper_example_standalone():
+    example_search_url = "https://www.autotrader.com/cars-for-sale/by-owner/all-states?searchRadius=0&sortBy=datelistedDESC&numRecords=25"
+    max_to_get = settings.MAX_LISTINGS_PER_SESSION
+    async with AutoTraderScraper() as scraper:
+        results = await scraper.scrape_listings(example_search_url, max_listings_to_fetch=max_to_get)
+        if results:
+            logger.info(f"\n--- Scraped {len(results)} AutoTrader Listings (Standalone Example Run) ---")
+            for i, listing in enumerate(results):
+                logger.info(f"{i+1}. ID_Ext: {listing.listing_id_external} - {listing.title} ({listing.year} {listing.make} {listing.model}) - Price: ${listing.price if listing.price else 'N/A'}")
+        else:
+            logger.info("No listings were extracted in the standalone example run.")
+    return results
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..94a9b77
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,21 @@
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+class Settings:
+    DATABASE_URL: str = os.getenv("DATABASE_URL", "sqlite+aiosqlite:///./default_vehicle_data.db")
+    HEADLESS: bool = os.getenv("HEADLESS", "true").lower() == "true"
+    BROWSER_TIMEOUT: int = int(os.getenv("BROWSER_TIMEOUT", "60000"))
+    PAGE_DELAY: int = int(os.getenv("PAGE_DELAY", "5000"))
+    MIN_DELAY_BETWEEN_ACTIONS: float = float(os.getenv("MIN_DELAY_BETWEEN_ACTIONS", "2.5"))
+    API_HOST: str = os.getenv("API_HOST", "127.0.0.1")
+    API_PORT: int = int(os.getenv("API_PORT", "8000"))
+    MAX_LISTINGS_PER_SESSION: int = int(os.getenv("MAX_LISTINGS_PER_SESSION", "25"))
+
+    # Proxy configuration
+    PROXY_SERVER: str | None = os.getenv("PROXY_SERVER")
+    PROXY_USERNAME: str | None = os.getenv("PROXY_USERNAME")
+    PROXY_PASSWORD: str | None = os.getenv("PROXY_PASSWORD")
+
+settings = Settings()
diff --git a/src/database.py b/src/database.py
new file mode 100644
index 0000000..e42f8f1
--- /dev/null
+++ b/src/database.py
@@ -0,0 +1,28 @@
+from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
+from src.config import settings
+
+DATABASE_URL = settings.DATABASE_URL
+
+engine = create_async_engine(
+    DATABASE_URL,
+    echo=False,
+    future=True
+)
+
+AsyncSessionLocal = async_sessionmaker(
+    bind=engine,
+    class_=AsyncSession,
+    expire_on_commit=False
+)
+
+async def create_db_tables():
+    from src.models.vehicle import Base
+    async with engine.begin() as conn:
+        await conn.run_sync(Base.metadata.create_all)
+
+async def get_db():
+    async with AsyncSessionLocal() as session:
+        try:
+            yield session
+        finally:
+            await session.close()
diff --git a/src/models/__init__.py b/src/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/models/vehicle.py b/src/models/vehicle.py
new file mode 100644
index 0000000..09bd717
--- /dev/null
+++ b/src/models/vehicle.py
@@ -0,0 +1,84 @@
+from sqlalchemy import Column, Integer, String, Float, DateTime, Text, Boolean
+from sqlalchemy.orm import declarative_base
+from sqlalchemy.sql import func
+from pydantic import BaseModel, Field
+from typing import Optional, List
+from datetime import datetime
+
+Base = declarative_base()
+
+class VehicleListing(Base):
+    __tablename__ = "vehicle_listings"
+
+    id = Column(Integer, primary_key=True, index=True)
+    listing_id_external = Column(String, index=True, unique=False, nullable=True)
+    title = Column(String, nullable=False)
+    year = Column(Integer, index=True, nullable=True)
+    make = Column(String, index=True, nullable=True)
+    model = Column(String, index=True, nullable=True)
+    trim = Column(String, nullable=True)
+    price = Column(Float, index=True, nullable=True)
+    mileage = Column(Integer, index=True, nullable=True)
+    listing_url = Column(Text, unique=True, nullable=False, index=True)
+    photo_url = Column(Text, nullable=True)
+    features = Column(Text, nullable=True)
+    location = Column(String, nullable=True)
+    seller_type = Column(String, default="private", nullable=True)
+    source_site = Column(String, default="autotrader", nullable=True)
+    created_at = Column(DateTime, default=func.now())
+    updated_at = Column(DateTime, default=func.now(), onupdate=func.now())
+    last_scraped_at = Column(DateTime, default=func.now(), onupdate=func.now())
+    is_active = Column(Boolean, default=True, index=True)
+
+class VehicleListingCreate(BaseModel):
+    listing_id_external: Optional[str] = None
+    title: str
+    year: Optional[int] = None
+    make: Optional[str] = None
+    model: Optional[str] = None
+    trim: Optional[str] = None
+    price: Optional[float] = None
+    mileage: Optional[int] = None
+    listing_url: str
+    photo_url: Optional[str] = None
+    features: Optional[List[str]] = Field(default_factory=list)
+    location: Optional[str] = None
+    seller_type: Optional[str] = "private"
+    source_site: Optional[str] = "autotrader"
+
+class VehicleListingResponse(BaseModel):
+    id: int
+    listing_id_external: Optional[str] = None
+    title: str
+    year: Optional[int] = None
+    make: Optional[str] = None
+    model: Optional[str] = None
+    trim: Optional[str] = None
+    price: Optional[float] = None
+    mileage: Optional[int] = None
+    listing_url: str
+    photo_url: Optional[str] = None
+    features: Optional[List[str]] = Field(default_factory=list)
+    location: Optional[str] = None
+    seller_type: Optional[str] = None
+    source_site: Optional[str] = None
+    created_at: datetime
+    updated_at: datetime
+    last_scraped_at: datetime
+    is_active: bool
+
+    class Config:
+        from_attributes = True
+
+class SearchFilters(BaseModel):
+    make: Optional[str] = None
+    model: Optional[str] = None
+    min_year: Optional[int] = None
+    max_year: Optional[int] = None
+    min_price: Optional[float] = None
+    max_price: Optional[float] = None
+    max_mileage: Optional[int] = None
+    location: Optional[str] = None
+    seller_type: Optional[str] = None
+    source_site: Optional[str] = None
+    is_active: Optional[bool] = True

From 4664e8ff3956472ad33974a697d56e08f496d4d6 Mon Sep 17 00:00:00 2001
From: hellothere012 <abbaaliharris@gmail.com>
Date: Wed, 4 Jun 2025 08:13:53 -0700
Subject: [PATCH 3/4] chore: remove unused files

---
 package.json     | 13 -------------
 stealth_utils.py | 30 ------------------------------
 2 files changed, 43 deletions(-)
 delete mode 100644 package.json
 delete mode 100644 stealth_utils.py

diff --git a/package.json b/package.json
deleted file mode 100644
index 5f62ed5..0000000
--- a/package.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "name": "autotrader-scraper",
-  "version": "1.0.0",
-  "description": "A FastAPI application for scraping Autotrader data.",
-  "main": "index.js",
-  "scripts": {
-    "start": "python app.py",
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "keywords": ["fastapi", "autotrader", "scraper", "web-scraping"],
-  "author": "",
-  "license": "ISC"
-}
diff --git a/stealth_utils.py b/stealth_utils.py
deleted file mode 100644
index e956687..0000000
--- a/stealth_utils.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import logging
-
-async def apply_stealth_js(page):
-    """
-    Applies various JavaScript injections to make Playwright less detectable.
-    """
-    try:
-        # Pass the User-Agent test (though Playwright usually handles this well)
-        # user_agent = await page.evaluate("() => navigator.userAgent")
-        # await page.set_extra_http_headers({'User-Agent': user_agent.replace("HeadlessChrome", "Chrome")}) # Example
-
-        # Pass the WebGL test
-        await page.add_init_script("(() => { const getParameter = WebGLRenderingContext.prototype.getParameter; WebGLRenderingContext.prototype.getParameter = function(parameter) { if (parameter === 37445) { return 'Intel Open Source Technology Center'; } if (parameter === 37446) { return 'Mesa DRI Intel(R) Ivybridge Mobile '; } return getParameter(parameter); }; })()")
-
-        # Pass the Chrome test
-        await page.add_init_script("(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); })()")
-        await page.add_init_script("(() => { window.chrome = { runtime: {}, loadTimes: function(){}, csi: function(){} }; })()")
-
-        # Pass the Permissions test
-        await page.add_init_script("(() => { const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters) => ( parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters) ); })()")
-
-        # Pass the Plugins Length test
-        await page.add_init_script("(() => { Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); })()")
-
-        # Pass the Languages test
-        await page.add_init_script("(() => { Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); })()")
-
-        logging.info("Applied JavaScript stealth techniques from stealth_utils.")
-    except Exception as e:
-        logging.error(f"Error applying stealth JS from stealth_utils: {e}", exc_info=True)

From f21a2335b277111dd996460cbfba3bf134853e4f Mon Sep 17 00:00:00 2001
From: hellothere012 <abbaaliharris@gmail.com>
Date: Wed, 4 Jun 2025 08:17:16 -0700
Subject: [PATCH 4/4] chore: remove unused files

---
 .env.example                  |  37 +--
 Procfile                      |   2 +-
 README.md                     |  48 +++-
 app.py                        | 123 ----------
 app/crud.py                   |  50 ----
 app/database.py               |  25 --
 app/main.py                   | 265 ---------------------
 app/models.py                 |  45 ----
 app/schemas.py                |  41 ----
 app/scraper.py                | 373 ------------------------------
 config.py                     |  34 ---
 database.py                   |  26 ---
 main.py                       |  93 ++++++++
 package.json                  |  13 --
 render.yaml                   |  26 ++-
 requirements.txt              |  15 +-
 {app => src}/__init__.py      |   0
 src/api/__init__.py           |   0
 src/api/routes.py             | 237 +++++++++++++++++++
 src/automation/__init__.py    |   0
 src/automation/browser_sim.py | 420 ++++++++++++++++++++++++++++++++++
 src/config.py                 |  21 ++
 src/database.py               |  28 +++
 src/models/__init__.py        |   0
 src/models/vehicle.py         |  84 +++++++
 stealth_utils.py              |  30 ---
 26 files changed, 975 insertions(+), 1061 deletions(-)
 delete mode 100644 app.py
 delete mode 100644 app/crud.py
 delete mode 100644 app/database.py
 delete mode 100644 app/main.py
 delete mode 100644 app/models.py
 delete mode 100644 app/schemas.py
 delete mode 100644 app/scraper.py
 delete mode 100644 config.py
 delete mode 100644 database.py
 create mode 100644 main.py
 delete mode 100644 package.json
 rename {app => src}/__init__.py (100%)
 create mode 100644 src/api/__init__.py
 create mode 100644 src/api/routes.py
 create mode 100644 src/automation/__init__.py
 create mode 100644 src/automation/browser_sim.py
 create mode 100644 src/config.py
 create mode 100644 src/database.py
 create mode 100644 src/models/__init__.py
 create mode 100644 src/models/vehicle.py
 delete mode 100644 stealth_utils.py

diff --git a/.env.example b/.env.example
index f7b9b12..4744016 100644
--- a/.env.example
+++ b/.env.example
@@ -1,21 +1,22 @@
-# Autotrader Configuration
-AUTOTRADER_URL="https://www.autotrader.com/cars-for-sale/by-owner/fullerton-ca?zip=92833&searchRadius=50&numRecords=100&sortBy=priceDESC"
+# Database Configuration
+DATABASE_URL=sqlite+aiosqlite:///./vehicle_data.db
 
-# Webshare Proxy Configuration
-PROXY_HOST="your_webshare_proxy_host"
-PROXY_PORT="your_webshare_proxy_port"
-WEBSHARE_USERNAME="your_webshare_username"
-WEBSHARE_PASSWORD="your_webshare_password"
+# Browser Configuration
+HEADLESS=true
+BROWSER_TIMEOUT=60000
+PAGE_DELAY=5000
+MIN_DELAY_BETWEEN_ACTIONS=2.5
 
-# Database Configuration
-# For local SQLite (default):
-DATABASE_URL="sqlite+aiosqlite:///./data/vehicle_tracker.db"
-DATABASE_TYPE="sqlite"
-# Example for PostgreSQL:
-# DATABASE_URL="postgresql+asyncpg://user:password@host:port/dbname"
-# DATABASE_TYPE="postgresql"
+# API Configuration
+API_HOST=127.0.0.1
+API_PORT=8000
+
+# Scraping Limits
+MAX_LISTINGS_PER_SESSION=25
 
-# Application Configuration
-LOG_LEVEL="INFO"
-HEADLESS_BROWSER="True" # For Playwright
-SCRAPE_TIMEOUT="120000" # For Playwright page/navigation timeout (milliseconds)
+# Optional Proxy Configuration
+# If using rotating proxies (e.g., Webshare), uncomment and provide the proxy URL.
+# Example: http://username:password@proxyhost:port
+# PROXY_SERVER=
+# PROXY_USERNAME=
+# PROXY_PASSWORD=
diff --git a/Procfile b/Procfile
index 3972b54..84b6dde 100644
--- a/Procfile
+++ b/Procfile
@@ -1 +1 @@
-web: uvicorn app.main:app --host=0.0.0.0 --port=${PORT:-8000}
+web: uvicorn main:app --host=0.0.0.0 --port=${PORT:-8000}
diff --git a/README.md b/README.md
index 894b33c..7525e16 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,47 @@
-# vehicle-tracker
\ No newline at end of file
+# Vehicle Tracker
+
+This project provides a FastAPI-based API and web scraper for collecting and storing vehicle listings from sites like AutoTrader. It uses Playwright for scraping and SQLAlchemy with SQLite for storage.
+
+## Usage
+
+1. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   playwright install chromium
+   ```
+2. Copy `.env.example` to `.env` and adjust settings as needed.
+3. Run the API:
+   ```bash
+   uvicorn main:app --reload
+   ```
+4. Trigger scraping via the `/api/v1/vehicles/scrape` endpoint.
+
+The scraper can also be run standalone:
+```bash
+python main.py scrape_test
+```
+
+## Environment Variables
+
+Set the following variables in a `.env` file or your deployment environment:
+
+| Variable | Description | Default |
+| --- | --- | --- |
+| `DATABASE_URL` | Database connection URL | `sqlite+aiosqlite:///./vehicle_data.db` |
+| `HEADLESS` | Run the browser in headless mode | `true` |
+| `BROWSER_TIMEOUT` | Playwright launch timeout (ms) | `60000` |
+| `PAGE_DELAY` | Base delay after page loads (ms) | `5000` |
+| `MIN_DELAY_BETWEEN_ACTIONS` | Delay between scraping actions (s) | `2.5` |
+| `API_HOST` | Host for the FastAPI server | `127.0.0.1` |
+| `API_PORT` | Port for the FastAPI server | `8000` |
+| `MAX_LISTINGS_PER_SESSION` | Maximum listings fetched per scrape | `25` |
+| `PROXY_SERVER` | *(Optional)* Proxy URL for Playwright | - |
+| `PROXY_USERNAME` | *(Optional)* Proxy username | - |
+| `PROXY_PASSWORD` | *(Optional)* Proxy password | - |
+
+### Pagination
+
+The `/api/v1/vehicles/` endpoint accepts `skip` and `limit` query parameters to paginate results.
+Example: `/api/v1/vehicles/?skip=25&limit=25`.
+
+
diff --git a/app.py b/app.py
deleted file mode 100644
index 8fa8500..0000000
--- a/app.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import logging
-# import os # No longer needed for getenv in background task
-import asyncio
-from fastapi import FastAPI, Depends, BackgroundTasks
-from pydantic import BaseModel
-from typing import Dict
-from datetime import datetime
-from database import CarListing, get_db, Session, SessionLocal
-from scraper import scrape_autotrader_and_update_db
-from fastapi.middleware.cors import CORSMiddleware
-from config import AUTOTRADER_URL, HEADLESS_BROWSER, SCRAPE_TIMEOUT, LOG_LEVEL # Import from config
-
-# Configure basic logging using LOG_LEVEL from config
-# Ensure this is called only once. If FastAPI/Uvicorn also configures logging,
-# this might need adjustment or to be handled by the logger instance directly.
-# For now, assume this is the primary logging config.
-logging.basicConfig(level=LOG_LEVEL, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
-# Added force=True to ensure this config takes precedence if uvicorn also tries to set basicConfig.
-
-app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-class CarListingRaw(BaseModel):
-    platform: str
-    extracted_at: datetime
-    source_url: str
-    data_points: Dict
-
-@app.post("/api/v1/listings/ingest")
-async def ingest_listing(payload: CarListingRaw, db: Session = Depends(get_db)):
-    listing = CarListing(
-        platform=payload.platform,
-        extracted_at=payload.extracted_at,
-        source_url=payload.source_url,
-        data_points=payload.data_points
-    )
-    db.add(listing)
-    db.commit()
-    db.refresh(listing)
-    return {"status": "saved", "listing_id": listing.id}
-
-@app.get("/")
-def read_root():
-    return {"message": "🚗 Car Tracker API is running!"}
-
-# Global variable to store scraping status
-scrape_status = {
-    "last_run_time": None,
-    "status": "idle", # States: idle, running, success, error
-    "message": "",
-    "added": 0,
-    "updated": 0,
-    "scraped_count": 0
-}
-
-# Background task wrapper
-async def _background_scraper_task_wrapper():
-    global scrape_status
-    db_task_session: Session = SessionLocal()
-    logging.info("Background scraper task started.")
-    scrape_status["status"] = "running"
-    scrape_status["message"] = "Scraping in progress..."
-    scrape_status["last_run_time"] = datetime.utcnow().isoformat()
-    scrape_status["added"] = 0 # Reset counts for current run
-    scrape_status["updated"] = 0
-    scrape_status["scraped_count"] = 0
-
-    try:
-        # Use imported config values
-        # autotrader_url = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/private-seller")
-        # headless_str = os.getenv("HEADLESS_BROWSER", "True")
-        # headless = headless_str.lower() == "true"
-        # scrape_timeout_str = os.getenv("SCRAPE_TIMEOUT", "120000")
-        # try:
-        #     scrape_timeout = int(scrape_timeout_str)
-        # except ValueError:
-        #     logging.warning(f"Invalid SCRAPE_TIMEOUT value: {scrape_timeout_str}. Defaulting to 120000ms.")
-        #     scrape_timeout = 120000
-
-        logging.info(f"Background task using URL: {AUTOTRADER_URL}, Headless: {HEADLESS_BROWSER}, Timeout: {SCRAPE_TIMEOUT}ms")
-
-        result = await scrape_autotrader_and_update_db(
-            db=db_task_session,
-            autotrader_url=AUTOTRADER_URL,
-            headless=HEADLESS_BROWSER,
-            scrape_timeout=SCRAPE_TIMEOUT
-        )
-
-        if result.get("status") == "success":
-            scrape_status["status"] = "success"
-            scrape_status["message"] = "Scraping completed successfully."
-            scrape_status["added"] = result.get("added", 0)
-            scrape_status["updated"] = result.get("updated", 0)
-            scrape_status["scraped_count"] = result.get("scraped_count", 0)
-        else:
-            scrape_status["status"] = "error"
-            scrape_status["message"] = result.get("message", "Scraping failed with an unknown error.")
-
-        logging.info(f"Background scraper task completed: {result}")
-
-    except Exception as e:
-        logging.error(f"Error in background scraper task: {e}", exc_info=True)
-        scrape_status["status"] = "error"
-        scrape_status["message"] = str(e)
-    finally:
-        db_task_session.close()
-        logging.info("Background scraper DB session closed.")
-
-@app.post("/api/v1/scrape/autotrader")
-async def trigger_autotrader_scrape(background_tasks: BackgroundTasks):
-    if scrape_status["status"] == "running":
-        return {"message": "AutoTrader scraping job is already running."}
-    background_tasks.add_task(_background_scraper_task_wrapper)
-    return {"message": "AutoTrader scraping job started in the background."}
-
-@app.get("/api/v1/scrape/status")
-async def get_scrape_status():
-    return scrape_status
diff --git a/app/crud.py b/app/crud.py
deleted file mode 100644
index fb8708b..0000000
--- a/app/crud.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from sqlalchemy.orm import Session
-from . import models, schemas
-from datetime import datetime
-
-def get_car_listing_by_url(db: Session, url: str):
-    return db.query(models.ScrapedData).filter(models.ScrapedData.url == url).first()
-
-def create_car_listing(db: Session, listing: schemas.CarListingCreate):
-    db_listing = models.ScrapedData(
-        job_id=listing.job_id,
-        platform=listing.platform,
-        url=str(listing.url), # Ensure HttpUrl is converted to string
-        title=listing.title,
-        price=listing.price,
-        mileage=listing.mileage,
-        vin=listing.vin,
-        image_urls=listing.image_urls, # Assuming image_urls is already a list of strings or compatible JSON
-        raw_data=listing.raw_data,
-        scraped_at=datetime.utcnow()
-    )
-    db.add(db_listing)
-    db.commit()
-    db.refresh(db_listing)
-    return db_listing
-
-def create_scrape_job(db: Session) -> models.ScrapeJob:
-    db_job = models.ScrapeJob(timestamp=datetime.utcnow(), status="pending")
-    db.add(db_job)
-    db.commit()
-    db.refresh(db_job)
-    return db_job
-
-def update_scrape_job_status(db: Session, job_id: int, status: str, results_count: int = 0, error_message: str = None):
-    db_job = db.query(models.ScrapeJob).filter(models.ScrapeJob.id == job_id).first()
-    if db_job:
-        db_job.status = status
-        db_job.results_count = results_count
-        db_job.error_message = error_message
-        db.commit()
-        db.refresh(db_job)
-    return db_job
-
-def get_scrape_job(db: Session, job_id: int):
-    return db.query(models.ScrapeJob).filter(models.ScrapeJob.id == job_id).first()
-
-def get_all_scrape_jobs(db: Session, skip: int = 0, limit: int = 100):
-    return db.query(models.ScrapeJob).order_by(models.ScrapeJob.timestamp.desc()).offset(skip).limit(limit).all()
-
-def get_listings_for_job(db: Session, job_id: int, skip: int = 0, limit: int = 100):
-    return db.query(models.ScrapedData).filter(models.ScrapedData.job_id == job_id).offset(skip).limit(limit).all()
diff --git a/app/database.py b/app/database.py
deleted file mode 100644
index bf32154..0000000
--- a/app/database.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from sqlalchemy import create_engine
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import sessionmaker
-import os
-
-DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./data/vehicle_tracker.db")
-
-engine_args = {}
-if DATABASE_URL.startswith("sqlite"):
-    engine_args["connect_args"] = {"check_same_thread": False}
-
-engine = create_engine(DATABASE_URL, **engine_args)
-SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-
-Base = declarative_base()
-
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-
-def create_tables():
-    Base.metadata.create_all(bind=engine)
diff --git a/app/main.py b/app/main.py
deleted file mode 100644
index 4817f15..0000000
--- a/app/main.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import logging
-import os
-from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
-from sqlalchemy.orm import Session
-from typing import List
-
-from . import crud, models, schemas, scraper
-from .database import SessionLocal, engine
-
-# Create database tables if they don't exist
-models.Base.metadata.create_all(bind=engine)
-
-# Configure logging
-LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
-logging.basicConfig(level=LOG_LEVEL, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-
-app = FastAPI(title="AutoTrader Scraper API", version="1.0.0")
-
-# Dependency to get DB session
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-
-# Global variable to store scraping status (simple approach)
-scrape_status = {
-    "job_id": None,
-    "status": "idle", # States: idle, pending, running, completed, failed
-    "message": "No scraping job initiated yet.",
-    "last_run_time": None,
-    "duration_seconds": None,
-    "results_count": 0,
-    "error_message": None
-}
-
-async def run_scraping_task(job_id: int, autotrader_url: str, headless: bool, scrape_timeout: int):
-    """
-    The actual scraping task that runs in the background.
-    It creates its own database session.
-    """
-    global scrape_status
-    db: Session = SessionLocal()
-    try:
-        logger.info(f"Background task started for job_id: {job_id}")
-        crud.update_scrape_job_status(db, job_id, status="running")
-        scrape_status.update({
-            "job_id": job_id,
-            "status": "running",
-            "message": f"Scraping from {autotrader_url}...",
-            "last_run_time": datetime.utcnow().isoformat(),
-            "duration_seconds": None,
-            "results_count": 0,
-            "error_message": None
-        })
-
-        start_time = datetime.utcnow()
-
-        scraped_data_list = await scraper.scrape_autotrader_data(
-            autotrader_url=autotrader_url,
-            headless=headless,
-            timeout=scrape_timeout
-        )
-
-        end_time = datetime.utcnow()
-        duration = (end_time - start_time).total_seconds()
-        scrape_status["duration_seconds"] = round(duration, 2)
-
-        added_count = 0
-        updated_count = 0 # Placeholder for future update logic
-
-        if not scraped_data_list:
-            logger.info(f"No listings found for job_id: {job_id}")
-            crud.update_scrape_job_status(db, job_id, status="completed", results_count=0)
-            scrape_status.update({
-                "status": "completed",
-                "message": "Scraping completed. No new listings found or page was inaccessible.",
-                "results_count": 0
-            })
-            return
-
-        for item_data in scraped_data_list:
-            # Ensure all required fields for CarListingCreate are present
-            listing_create = schemas.CarListingCreate(
-                job_id=job_id,
-                platform=item_data.get("source_name", "autotrader"), # Get platform from scraper or default
-                url=item_data.get("listing_url"),
-                title=item_data.get("title"),
-                price=item_data.get("price"),
-                mileage=item_data.get("mileage"),
-                vin=item_data.get("vin"),
-                image_urls=item_data.get("image_urls", []),
-                raw_data=item_data.get("data_points", {})
-            )
-
-            existing_listing = crud.get_car_listing_by_url(db, str(listing_create.url))
-            if existing_listing:
-                # For now, we just count updates. Actual update logic could be added here.
-                # e.g., existing_listing.price = listing_create.price
-                # existing_listing.extracted_at = datetime.utcnow()
-                updated_count += 1
-            else:
-                crud.create_car_listing(db=db, listing=listing_create)
-                added_count += 1
-
-        crud.update_scrape_job_status(db, job_id, status="completed", results_count=added_count)
-        scrape_status.update({
-            "status": "completed",
-            "message": f"Scraping finished. Added: {added_count}, Updated: {updated_count} (placeholder).",
-            "results_count": added_count + updated_count # Or just added_count if updates aren't really changing data
-        })
-        logger.info(f"Background task for job_id: {job_id} completed. Added: {added_count}, Updated: {updated_count}")
-
-    except Exception as e:
-        logger.error(f"Error in background scraper task for job_id {job_id}: {e}", exc_info=True)
-        crud.update_scrape_job_status(db, job_id, status="failed", error_message=str(e))
-        scrape_status.update({
-            "status": "failed",
-            "message": f"Error during scraping: {str(e)}",
-            "error_message": str(e)
-        })
-    finally:
-        db.close()
-        logger.info(f"DB session closed for job_id: {job_id}")
-
-
-@app.post("/scrape/", response_model=schemas.ScrapeJob, status_code=202)
-async def trigger_scrape(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
-    """
-    Triggers a new scraping job for Autotrader.
-    """
-    global scrape_status
-    if scrape_status.get("status") == "running":
-        raise HTTPException(status_code=409, detail="A scraping job is already in progress.")
-
-    autotrader_url = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/all-cars/cars-under-10000") # Default to a common search if not set
-    headless_str = os.getenv("HEADLESS_BROWSER", "True")
-    headless = headless_str.lower() == "true"
-    scrape_timeout_str = os.getenv("SCRAPE_TIMEOUT", "120000")
-
-    try:
-        scrape_timeout = int(scrape_timeout_str)
-    except ValueError:
-        scrape_timeout = 120000 # Default timeout if parsing fails
-        logger.warning(f"Invalid SCRAPE_TIMEOUT value: {scrape_timeout_str}. Using default {scrape_timeout}ms.")
-
-    job = crud.create_scrape_job(db)
-    scrape_status.update({
-        "job_id": job.id,
-        "status": "pending",
-        "message": f"Scraping job {job.id} initiated for URL: {autotrader_url}",
-        "last_run_time": job.timestamp.isoformat(),
-        "duration_seconds": None,
-        "results_count": 0,
-        "error_message": None
-    })
-
-    # Pass job_id to the background task
-    background_tasks.add_task(run_scraping_task, job.id, autotrader_url, headless, scrape_timeout)
-
-    logger.info(f"Scraping job {job.id} queued for URL: {autotrader_url}")
-    return job
-
-@app.post("/api/v1/listings/ingest", response_model=schemas.CarListing, status_code=201)
-async def ingest_listing(payload: schemas.CarListingCreate, db: Session = Depends(get_db)):
-    """
-    Ingests a new car listing into the database.
-    This endpoint is useful for manually adding or testing data.
-    """
-    # Check if listing with this URL already exists to prevent duplicates,
-    # though the database constraint should also handle this.
-    db_listing = crud.get_car_listing_by_url(db, url=str(payload.url))
-    if db_listing:
-        raise HTTPException(status_code=400, detail="Listing with this URL already exists.")
-
-    # The job_id in CarListingCreate might be problematic if this is a direct ingest
-    # not tied to a specific scrape job. For now, we'll assume it's provided or
-    # we could adjust the schema/logic if direct ingestion shouldn't have a job_id.
-    # For testing, we might need to create a dummy job or adjust schema.
-    # Let's assume for now a valid job_id is provided or handle it if not.
-    if not payload.job_id:
-        # Create a dummy job or handle as per requirements for listings not tied to a job
-        # For simplicity, let's assume job_id is optional in the schema for this use case
-        # or a default/placeholder job_id is used.
-        # For this test, the payload includes job_id, so we'll proceed.
-        # If CarListingCreate schema requires job_id, this endpoint needs to handle it.
-        # For now, let's assume it's provided in the payload.
-        pass
-
-    try:
-        created_listing = crud.create_car_listing(db=db, listing=payload)
-        return created_listing
-    except Exception as e:
-        logger.error(f"Error ingesting listing: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
-
-
-@app.get("/scrape/status", response_model=schemas.ScrapeJob) # Using ScrapeJob schema for better structure
-async def get_current_scrape_status(db: Session = Depends(get_db)):
-    """
-    Returns the status of the current or last scraping job.
-    """
-    global scrape_status
-    if scrape_status.get("job_id"):
-        job = crud.get_scrape_job(db, scrape_status["job_id"])
-        if job:
-            # Update status from DB if available, otherwise use in-memory for simplicity
-            # A more robust system might always fetch from DB or use a proper job queue status
-            return job
-    return scrape_status # Fallback to in-memory status if job not found or not started
-
-@app.get("/scrape/jobs/", response_model=List[schemas.ScrapeJob])
-async def read_jobs(skip: int = 0, limit: int = 10, db: Session = Depends(get_db)):
-    """
-    Retrieve all scrape jobs.
-    """
-    jobs = crud.get_all_scrape_jobs(db, skip=skip, limit=limit)
-    return jobs
-
-@app.get("/scrape/jobs/{job_id}/results", response_model=List[schemas.CarListing])
-async def read_job_results(job_id: int, skip: int = 0, limit: int = 10, db: Session = Depends(get_db)):
-    """
-    Retrieve results for a specific scrape job.
-    """
-    job = crud.get_scrape_job(db, job_id=job_id)
-    if job is None:
-        raise HTTPException(status_code=404, detail="Job not found")
-    listings = crud.get_listings_for_job(db, job_id=job_id, skip=skip, limit=limit)
-    return listings
-
-@app.get("/")
-async def read_root():
-    return {"message": "AutoTrader Scraper API is running!"}
-
-# This is for local development if you run `python app/main.py`
-# Uvicorn will be started by Procfile in production environments like Heroku
-if __name__ == "__main__":
-    # Ensure tables are created before starting the app if they don't exist
-    # This is useful for local development but might be handled differently in production
-    from .database import create_tables
-    create_tables()
-
-    # Get port from environment variable or default to 8000
-    port = int(os.getenv("PORT", "8000"))
-    uvicorn.run(app, host="0.0.0.0", port=port)
-
-# Remove the old main.py content if it exists in the root directory
-# This is now handled by app/main.py
-# Ensure Procfile points to app.main:app or similar based on your directory structure
-# e.g., web: uvicorn app.main:app --host=0.0.0.0 --port=${PORT:-8000}
-# (Assuming app.py is moved to app/main.py)
-# If app.py remains in root, then Procfile is fine.
-
-# The `models.Base.metadata.create_all(bind=engine)` should ideally be called once,
-# perhaps in main.py or a startup script, not every time database.py is imported.
-# For simplicity in this single-file app structure, it's often put there.
-# If app.py is the main entry point for uvicorn, it's a good place.
-# For Render, buildCommand in render.yaml can also handle migrations/table creation.
-
-# Let's ensure the imports are correct considering the file structure
-# If main.py is in root and imports from app/, it should be `from app import crud, models, schemas, scraper`
-# If this file is app/main.py, then `from . import crud, models, schemas, scraper` is correct.
-# The prompt implies this file is app/main.py.
diff --git a/app/models.py b/app/models.py
deleted file mode 100644
index b0d4e5d..0000000
--- a/app/models.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
-from datetime import datetime
-
-Base = declarative_base()
-
-class ScrapeResult(Base):
-    __tablename__ = "scrape_results"
-
-    id = Column(Integer, primary_key=True, index=True)
-    url = Column(String, index=True)
-    title = Column(String)
-    price = Column(String, nullable=True) # Store as string to handle variations like 'Contact Seller'
-    mileage = Column(String, nullable=True) # Store as string to handle non-numeric values
-    vin = Column(String, nullable=True, unique=True)
-    images = Column(JSON, nullable=True) # Store list of image URLs
-    scraped_at = Column(DateTime)
-    details = Column(JSON, nullable=True) # Store other details as JSON
-
-class ScrapeJob(Base):
-    __tablename__ = "scrape_jobs"
-
-    id = Column(Integer, primary_key=True, index=True)
-    timestamp = Column(DateTime, default=datetime.utcnow)
-    status = Column(String, default="pending") # e.g., pending, running, completed, failed
-    results_count = Column(Integer, default=0)
-    error_message = Column(String, nullable=True)
-
-class ScrapedData(Base):
-    __tablename__ = "scraped_data"
-
-    id = Column(Integer, primary_key=True, index=True)
-    job_id = Column(Integer, ForeignKey("scrape_jobs.id"))
-    platform = Column(String) # e.g., 'autotrader', 'cars.com'
-    url = Column(String, unique=True, index=True)
-    title = Column(String, nullable=True)
-    price = Column(String, nullable=True)
-    mileage = Column(String, nullable=True)
-    vin = Column(String, nullable=True, index=True)
-    image_urls = Column(JSON, nullable=True) # List of image URLs
-    raw_data = Column(JSON, nullable=True) # Full raw data if needed
-    scraped_at = Column(DateTime, default=datetime.utcnow)
-
-    job = relationship("ScrapeJob")
diff --git a/app/schemas.py b/app/schemas.py
deleted file mode 100644
index 2ee0d8d..0000000
--- a/app/schemas.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from pydantic import BaseModel, HttpUrl
-from typing import List, Optional, Dict, Any
-from datetime import datetime
-
-class CarListingBase(BaseModel):
-    url: HttpUrl
-    title: Optional[str] = None
-    price: Optional[str] = None # Keep as string to handle variations
-    mileage: Optional[str] = None # Keep as string
-    vin: Optional[str] = None
-    image_urls: Optional[List[HttpUrl]] = []
-    raw_data: Optional[Dict[str, Any]] = {} # For any other unstructured data
-
-class CarListingCreate(CarListingBase):
-    platform: str
-    job_id: int
-
-class CarListing(CarListingBase):
-    id: int
-    platform: str
-    job_id: int
-    scraped_at: datetime
-
-    class Config:
-        orm_mode = True
-
-class ScrapeJobBase(BaseModel):
-    pass
-
-class ScrapeJobCreate(ScrapeJobBase):
-    pass
-
-class ScrapeJob(ScrapeJobBase):
-    id: int
-    timestamp: datetime
-    status: str
-    results_count: int = 0
-    error_message: Optional[str] = None
-
-    class Config:
-        orm_mode = True
diff --git a/app/scraper.py b/app/scraper.py
deleted file mode 100644
index b946ecb..0000000
--- a/app/scraper.py
+++ /dev/null
@@ -1,373 +0,0 @@
-import asyncio
-import logging
-# import os # No longer needed for getenv in main
-import datetime # Keep for now, might be used in data processing
-from playwright.async_api import async_playwright
-# Required for main test function
-from config import AUTOTRADER_URL, HEADLESS_BROWSER, SCRAPE_TIMEOUT
-# DATABASE_URL is used by database.py, SessionLocal will pick it up via config
-
-# Assuming database.py is in the same directory or accessible in PYTHONPATH
-from database import get_db, CarListing, SessionLocal # Added SessionLocal for main example
-from sqlalchemy.orm import Session
-from datetime import datetime # Ensure datetime is imported directly
-
-# Configure basic logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-from stealth_utils import apply_stealth_js # Import new stealth utility
-# from playwright_stealth import stealth_async # Commenting out old stealth
-
-class AutoTraderScraper:
-    """Scraper for AutoTrader private party listings using Playwright."""
-
-    def __init__(self, source_name: str = "autotrader"):
-        """
-        Initializes the AutoTraderScraper.
-        Args:
-            source_name (str): Name of the source platform.
-        """
-        self.source_name = source_name
-        # Potentially load other configs from a config file or env vars here
-        # For example: self.base_url = "https://www.autotrader.com/cars-for-sale/private-seller"
-
-    async def get_private_listings(self, autotrader_url: str, headless: bool, timeout: int = 120000) -> list[dict]:
-        """
-        Scrapes private party listings from AutoTrader using Playwright.
-
-        Args:
-            autotrader_url (str): The starting URL for scraping AutoTrader private listings.
-            headless (bool): Whether to run the browser in headless mode.
-            timeout (int): Maximum time in milliseconds for page operations.
-
-        Returns:
-            list[dict]: A list of dictionaries, where each dictionary represents a scraped vehicle listing.
-        """
-        listings_data = []
-        browser = None
-
-        launch_options = {
-            "headless": headless,
-            "args": [
-                '--no-sandbox',
-                '--disable-setuid-sandbox',
-                '--disable-infobars',
-                '--window-position=0,0',
-                '--ignore-certificate-errors',
-                '--ignore-certificate-errors-spki-list',
-                # '--user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"' # User agent is set in context
-                '--disable-gpu' # Already there but keep
-            ],
-            # "channel": "chrome" # This might require full Chrome install, trying without first to see if args help
-        }
-
-        # Try with 'msedge' or 'chrome' if default chromium fails and they are available
-        # For now, stick to chromium and args. If 'channel' is needed, it's a bigger setup change.
-
-        async with async_playwright() as p:
-            try:
-                # browser = await p.chromium.launch(**launch_options) # Default chromium
-                # Let's try specifying channel, assuming it might use a locally installed Chrome if available, or a Playwright-managed one.
-                # This is a common suggestion if the default Playwright Chromium build is too easily detected.
-                # If "chrome" channel is not found by Playwright, it will error.
-                try:
-                    browser = await p.chromium.launch(
-                        **launch_options,
-                        channel="chrome" # Attempt to use a branded Chrome build
-                    )
-                    logging.info("Attempting to launch with channel='chrome'")
-                except Exception as e_channel:
-                    logging.warning(f"Failed to launch with channel='chrome' ({e_channel}). Falling back to default Playwright Chromium.")
-                    # Remove channel from launch_options if it failed
-                    launch_options_no_channel = launch_options.copy()
-                    if "channel" in launch_options_no_channel: # Should not be needed based on above structure but good practice
-                        del launch_options_no_channel["channel"]
-                    browser = await p.chromium.launch(**launch_options_no_channel)
-                    logging.info("Launched with default Playwright Chromium.")
-
-
-                context = await browser.new_context(
-                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36', # A fairly common user agent
-                    java_script_enabled=True,
-                )
-                context.set_default_navigation_timeout(timeout)
-                context.set_default_timeout(timeout)
-
-                page = await context.new_page()
-                await page.set_viewport_size({"width": 1920, "height": 1080})
-
-                # Apply custom JS stealth
-                await apply_stealth_js(page)
-
-                logging.info(f"Navigating to {autotrader_url}")
-                await page.goto(autotrader_url, wait_until="domcontentloaded", timeout=timeout) # Reverted to domcontentloaded
-
-                title = await page.title()
-                logging.info(f"Page title: {title}")
-
-                if "unavailable" in title.lower() or "block" in title.lower() or "access denied" in title.lower():
-                    logging.critical(f"Failed to load AutoTrader listings page. Blocked by website. Title: {title}")
-                    await browser.close() # Ensure browser is closed before returning
-                    return []
-
-                # Using speculative selectors for AutoTrader
-                # Main container for listings: 'div[data-qaid="cntnr-lstng-main"]' (this might be too broad or incorrect)
-                # A more specific item selector might be needed, e.g., an article or a div with a specific class.
-                # For now, let's assume individual listing cards can be found with a selector like:
-                # "div.inventory-listing" or "div[data-cmp='inventoryListing']" - these are common patterns.
-                # The provided example 'div[data-qaid="cntnr-lstng-main"]' seems like it might be a single container FOR ALL listings.
-                # Let's try a more specific (but still guessed) selector for individual listing items.
-                # A common pattern is items within a list or grid. Let's try to find items:
-                # This selector is a **GUESS** based on common AutoTrader structures.
-                listing_item_selector = "div[data-cmp='inventoryListing']" # GUESS
-
-                # Fallback if the primary guess doesn't work, try another common pattern
-                # listing_item_selector_fallback = "div.inventory-listing.new-listing.stub" # Another GUESS
-
-                # await page.wait_for_selector(listing_item_selector, timeout=15000) # Wait for items to appear
-
-                listing_containers = await page.query_selector_all(listing_item_selector)
-
-                # if not listing_containers:
-                #     logging.info(f"No listings found with primary selector '{listing_item_selector}'. Trying fallback...")
-                #     listing_containers = await page.query_selector_all(listing_item_selector_fallback)
-
-                logging.info(f"Found {len(listing_containers)} potential listing containers using selector '{listing_item_selector}'.")
-
-                processed_count = 0
-                # first_container_processed_for_html_dump = False # REMOVE HTML DUMP FLAG
-                for i, container in enumerate(listing_containers):
-                    url_path = None
-                    title_text = "N/A" # Default to N/A
-                    price_text = "N/A" # Default to N/A
-                    mileage_text = "N/A" # Default to N/A (as it's not reliably on card)
-                    listing_url = None
-
-                    try:
-                        logging.debug(f"Processing container {i+1}/{len(listing_containers)}")
-
-                        # Attempt to get Title
-                        title_el = await container.query_selector("h2[data-cmp='subheading']") # Updated selector from HTML dump
-                        if title_el:
-                            raw_title_text = await title_el.inner_text()
-                            title_text = raw_title_text.strip() if raw_title_text else "N/A"
-
-                            # Attempt to get URL from parent <a> of title_el
-                            # Playwright's query_selector does not directly support xpath like "ancestor::a".
-                            # A common structure is <a><h3>...</h3></a> or <a><h2>...</h2></a>
-                            # We can try to find 'a' that contains this h2, or assume the 'a[data-cmp="link"]' is the one.
-
-                            # Let's use the a[data-cmp="link"] which was identified as containing the title h2
-                            parent_link_el = await container.query_selector("a[data-cmp='link']")
-                            if parent_link_el:
-                                url_path = await parent_link_el.get_attribute("href")
-                            else: # Fallback if the above structure isn't found
-                                logging.warning(f"Could not find parent a[data-cmp='link'] for title in listing {i+1}")
-                        else:
-                            logging.warning(f"Title not found with h2[data-cmp='subheading'] for listing {i+1}.")
-
-                        # Fallback or alternative for URL if not found via title's parent link
-                        if not url_path:
-                            url_el_alt = await container.query_selector("a[data-cmp='relLnk']") # Keep this fallback
-                            if url_el_alt:
-                                url_path = await url_el_alt.get_attribute("href")
-
-                        if not url_path: # Last resort for URL
-                            first_a = await container.query_selector("a[href]") # Broadest fallback
-                            if first_a:
-                                url_path = await first_a.get_attribute("href")
-
-                        if not url_path:
-                            logging.warning(f"Could not extract URL for listing {i+1} (Title: {title_text}). Skipping.")
-                            continue
-
-                        if not url_path.startswith(('http://', 'https://')):
-                            listing_url = f"https://www.autotrader.com{url_path}"
-                        else:
-                            listing_url = url_path
-
-                        # Attempt to get Price
-                        price_el = await container.query_selector("div[data-cmp='firstPrice']") # Updated selector
-                        if price_el:
-                            raw_price_text = await price_el.inner_text()
-                            price_text = raw_price_text.replace('$', '').replace(',', '').strip() if raw_price_text else "N/A"
-                        else:
-                            # Fallback for price (e.g. .first-price class directly)
-                            price_el_fallback = await container.query_selector(".first-price")
-                            if price_el_fallback:
-                                raw_price_text = await price_el_fallback.inner_text()
-                                price_text = raw_price_text.replace('$', '').replace(',', '').strip() if raw_price_text else "N/A"
-                            else:
-                                logging.warning(f"Price not found for listing {listing_url}")
-                                price_text = "N/A"
-
-
-                        # Mileage - Set to N/A as it's not reliably on the card from previous findings
-                        mileage_text = "N/A"
-                        # logging.info(f"Mileage not scraped from listing card for {listing_url} (by design for now).")
-
-                        vin_text = None
-
-                        listing_data = {
-                            "listing_url": listing_url,
-                            "title": title_text, # Already defaults to N/A or has value
-                            "price": price_text, # Already defaults to N/A or has value
-                            "mileage": mileage_text, # Is N/A
-                            "vin": vin_text,
-                            "source_name": self.source_name,
-                            "data_points": {
-                                "page_title_at_scrape": title # page's title, not listing's
-                            }
-                        }
-                        listings_data.append(listing_data)
-                        processed_count += 1
-                        logging.info(f"Successfully processed listing: {title_text[:50]}... URL: {listing_url}")
-
-                    except Exception as e:
-                        logging.error(f"Error processing listing container {i+1} for URL {listing_url if listing_url else 'Unknown'}: {e}", exc_info=True)
-                        continue
-
-                logging.info(f"Successfully processed {processed_count} out of {len(listing_containers)} listing containers.")
-
-            except Exception as e:
-                logging.error(f"An error occurred during Playwright scraping phase: {e}", exc_info=True)
-            finally:
-                if browser:
-                    logging.info("Closing browser.")
-                    await browser.close()
-
-        return listings_data
-
-
-async def scrape_autotrader_data(autotrader_url: str, headless: bool = True, timeout: int = 120000) -> list[dict]:
-    """
-    High-level function to scrape data from AutoTrader.
-    Initializes the scraper and calls its scraping method.
-
-    Args:
-        autotrader_url (str): The URL to scrape.
-        headless (bool): Whether to run the browser in headless mode.
-        timeout (int): Timeout for scraping operations in milliseconds.
-
-    Returns:
-        list[dict]: A list of scraped listing data.
-    """
-    scraper = AutoTraderScraper()
-    listings = await scraper.get_private_listings(autotrader_url=autotrader_url, headless=headless, timeout=timeout)
-    return listings
-
-
-async def scrape_autotrader_and_update_db(db: Session, autotrader_url: str, headless: bool, scrape_timeout: int):
-    """
-    Scrapes listings from AutoTrader and updates the database.
-
-    Args:
-        db (Session): The SQLAlchemy database session.
-        autotrader_url (str): The URL to scrape.
-        headless (bool): Whether to run the browser in headless mode.
-        scrape_timeout (int): Timeout for scraping operations in milliseconds.
-
-    Returns:
-        dict: A status dictionary with counts of added, updated, and scraped listings.
-    """
-    logging.info(f"Starting scrape and update for URL: {autotrader_url}")
-
-    try:
-        listings_data = await scrape_autotrader_data(
-            autotrader_url=autotrader_url,
-            headless=headless,
-            timeout=scrape_timeout
-        )
-    except Exception as e:
-        logging.error(f"Failed to scrape data from {autotrader_url}: {e}", exc_info=True)
-        return {"status": "error", "message": f"Scraping failed: {e}"}
-
-    added_count = 0
-    updated_count = 0
-    scraped_count = len(listings_data)
-
-    for listing_data in listings_data:
-        source_url = listing_data.get('listing_url') # Renamed from 'url' to 'listing_url' in dummy data
-        if not source_url:
-            logging.warning(f"Scraped item missing 'listing_url': {listing_data.get('title')}. Skipping.")
-            continue
-
-        try:
-            existing_listing = db.query(CarListing).filter(CarListing.source_url == source_url).first()
-
-            if existing_listing:
-                # Placeholder for update logic
-                # existing_listing.extracted_at = datetime.utcnow()
-                # existing_listing.data_points = {k: v for k, v in listing_data.items() if k != 'listing_url'}
-                # # Update other fields like price if necessary
-                # db.add(existing_listing) # Not strictly necessary if only mutable fields changed and session tracks
-                updated_count += 1
-                logging.info(f"Listing at {source_url} already exists. Marked for update (placeholder).")
-            else:
-                new_listing = CarListing(
-                    platform="autotrader",
-                    extracted_at=datetime.utcnow(),
-                    source_url=source_url,
-                    # Ensure data_points stores everything else from listing_data
-                    data_points={k: v for k, v in listing_data.items() if k != 'listing_url'}
-                )
-                db.add(new_listing)
-                added_count += 1
-                logging.info(f"New listing added from {source_url}")
-        except Exception as e:
-            logging.error(f"Error processing listing {source_url} for DB: {e}", exc_info=True)
-            # Decide if you want to rollback here or continue with other listings
-
-    try:
-        db.commit()
-        logging.info("Database changes committed.")
-    except Exception as e:
-        logging.error(f"Database commit failed: {e}", exc_info=True)
-        db.rollback()
-        return {"status": "error", "message": f"DB commit failed: {e}", "added": 0, "updated": 0, "scraped_count": scraped_count}
-
-    status_summary = {
-        "status": "success",
-        "added": added_count,
-        "updated": updated_count,
-        "scraped_count": scraped_count
-    }
-    logging.info(f"DB update summary: {status_summary}")
-    return status_summary
-
-async def main():
-    # Use settings from config.py
-    # url = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/private-seller")
-    # headless_str = os.getenv("HEADLESS_BROWSER", "True")
-    # headless = headless_str.lower() == "true"
-    # scrape_timeout_str = os.getenv("SCRAPE_TIMEOUT", "120000")
-    # try:
-    #     scrape_timeout = int(scrape_timeout_str)
-    # except ValueError:
-    #     logging.warning(f"Invalid SCRAPE_TIMEOUT value: {scrape_timeout_str}. Defaulting to 120000ms.")
-    #     scrape_timeout = 120000
-
-    # from database import SessionLocal # Already imported at the top
-    db: Session = SessionLocal() # SessionLocal now uses DATABASE_URL from config.py via database.py
-    try:
-        logging.info(f"Starting scraper and DB update for URL: {AUTOTRADER_URL}, Headless: {HEADLESS_BROWSER}, Timeout: {SCRAPE_TIMEOUT}ms")
-        stats = await scrape_autotrader_and_update_db(
-            db=db,
-            autotrader_url=AUTOTRADER_URL,
-            headless=HEADLESS_BROWSER,
-            scrape_timeout=SCRAPE_TIMEOUT
-        )
-        logging.info(f"Scraping and DB update completed: {stats}")
-    except Exception as e:
-        logging.error(f"Error during scraping and DB update in main: {e}", exc_info=True)
-    finally:
-        logging.info("Closing DB session in main.")
-        db.close()
-
-if __name__ == "__main__":
-    # To run this:
-    # 1. Ensure Playwright browsers are installed: `playwright install chromium`
-    # 2. Set environment variables if needed (AUTOTRADER_URL, HEADLESS_BROWSER, SCRAPE_TIMEOUT)
-    # 3. Uncomment the line below
-    asyncio.run(main())
-    # pass # Keep it passive for now, to be run manually when needed
diff --git a/config.py b/config.py
deleted file mode 100644
index 44148ca..0000000
--- a/config.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import os
-from dotenv import load_dotenv
-
-# Load environment variables from .env file if it exists
-# This is useful for local development.
-load_dotenv()
-
-# Database Configuration
-DATABASE_URL: str = os.getenv("DATABASE_URL", "sqlite:///./data/vehicle_tracker.db")
-
-# Scraper Configuration
-AUTOTRADER_URL: str = os.getenv("AUTOTRADER_URL", "https://www.autotrader.com/cars-for-sale/private-seller")
-SCRAPE_TIMEOUT: int = int(os.getenv("SCRAPE_TIMEOUT", "120000"))  # Milliseconds
-HEADLESS_BROWSER: bool = os.getenv("HEADLESS_BROWSER", "True").lower() == "true"
-
-# API Configuration (if any specific ones are needed later)
-# Example: API_HOST: str = os.getenv("API_HOST", "0.0.0.0")
-# Example: API_PORT: int = int(os.getenv("API_PORT", "8000"))
-
-# Logging Configuration (can also be added here if more complex)
-LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO").upper()
-
-# Ensure critical URLs have a scheme for robustness
-if not AUTOTRADER_URL.startswith(("http://", "https://")):
-    # This print statement is for immediate feedback during startup/import.
-    # In a pure library, side effects on import are sometimes discouraged,
-    # but for an application's main config, it's often acceptable.
-    print(f"Warning: AUTOTRADER_URL ('{AUTOTRADER_URL}') did not have a scheme, prepended https://.")
-    AUTOTRADER_URL = "https://" + AUTOTRADER_URL
-    print(f"Corrected AUTOTRADER_URL: {AUTOTRADER_URL}")
-
-
-# Example of how to handle SQLite connect_args based on config
-DB_CONNECT_ARGS: dict = {"check_same_thread": False} if DATABASE_URL.startswith("sqlite") else {}
diff --git a/database.py b/database.py
deleted file mode 100644
index 58a11dc..0000000
--- a/database.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from sqlalchemy import Column, Integer, String, DateTime, JSON, create_engine
-from sqlalchemy.orm import declarative_base, sessionmaker, Session
-from config import DATABASE_URL, DB_CONNECT_ARGS # Import from config
-
-# Use imported configuration
-engine = create_engine(DATABASE_URL, connect_args=DB_CONNECT_ARGS)
-
-SessionLocal = sessionmaker(bind=engine, autoflush=False)
-Base = declarative_base()
-
-class CarListing(Base):
-    __tablename__ = "listings"
-    id = Column(Integer, primary_key=True, index=True)
-    platform = Column(String)
-    extracted_at = Column(DateTime)
-    source_url = Column(String, unique=True)
-    data_points = Column(JSON)
-
-Base.metadata.create_all(bind=engine)
-
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..fb37fc5
--- /dev/null
+++ b/main.py
@@ -0,0 +1,93 @@
+import asyncio
+import uvicorn
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import logging
+from datetime import datetime
+
+from src.database import create_db_tables
+from src.api.routes import router as api_v1_router
+from src.config import settings
+from src.automation.browser_sim import run_autotrader_scraper_example_standalone
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler()]
+)
+logger = logging.getLogger(__name__)
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    logger.info("\ud83d\ude80 Starting Educational Vehicle Tracker API...")
+    await create_db_tables()
+    logger.info("\ud83d\udcca Database tables checked/created.")
+    yield
+    logger.info("\ud83d\udd1b Shutting down Educational Vehicle Tracker API.")
+
+app = FastAPI(
+    title="Educational Vehicle Tracker",
+    description="An educational system for learning web automation and data pipeline architecture, now with real scraping.",
+    version="1.1.0",
+    lifespan=lifespan
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.include_router(api_v1_router, prefix="/api/v1")
+
+@app.get("/", include_in_schema=False)
+async def root_redirect_to_docs():
+    from fastapi.responses import RedirectResponse
+    return RedirectResponse(url="/docs")
+
+@app.get("/health", summary="Health Check")
+async def health_check():
+    return {"status": "healthy", "service": "vehicle-tracker-api", "timestamp": datetime.utcnow()}
+
+async def run_standalone_scrape_cli():
+    logger.info("\ud83c\udf3d Running Standalone AutoTrader Scraper Example from CLI")
+    print("=" * 40)
+    try:
+        await run_autotrader_scraper_example_standalone()
+        logger.info("\u2705 Standalone scraper example completed successfully!")
+    except Exception as e:
+        logger.error(f"\u274c Standalone scraper example failed: {e}", exc_info=True)
+
+if __name__ == "__main__":
+    import sys
+    print_startup_message = True
+    if len(sys.argv) > 1:
+        if sys.argv[1] == "scrape_test":
+            print_startup_message = False
+            asyncio.run(run_standalone_scrape_cli())
+        elif sys.argv[1] == "create_tables":
+            print_startup_message = False
+            asyncio.run(create_db_tables())
+            logger.info("Database tables creation process finished.")
+        else:
+            logger.warning(f"Unknown command: {sys.argv[1]}")
+            print("\ud83d\udd0d Usage: python main.py [scrape_test | create_tables]")
+    if print_startup_message:
+        logger.info("\ud83c\udf93 Educational Vehicle Tracking System - API Server Mode")
+        print("=" * 50)
+        logger.info(f"API Host: {settings.API_HOST}")
+        logger.info(f"API Port: {settings.API_PORT}")
+        logger.info(f"Database: {settings.DATABASE_URL}")
+        logger.info(f"Max Listings per Session (Scrape): {settings.MAX_LISTINGS_PER_SESSION}")
+        logger.info(f"Playwright Headless: {settings.HEADLESS}")
+        print("=" * 50)
+        uvicorn.run(
+            "main:app",
+            host=settings.API_HOST,
+            port=settings.API_PORT,
+            reload=True,
+            log_level="info"
+        )
diff --git a/package.json b/package.json
deleted file mode 100644
index 5f62ed5..0000000
--- a/package.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "name": "autotrader-scraper",
-  "version": "1.0.0",
-  "description": "A FastAPI application for scraping Autotrader data.",
-  "main": "index.js",
-  "scripts": {
-    "start": "python app.py",
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "keywords": ["fastapi", "autotrader", "scraper", "web-scraping"],
-  "author": "",
-  "license": "ISC"
-}
diff --git a/render.yaml b/render.yaml
index c0e7ffa..f755e35 100644
--- a/render.yaml
+++ b/render.yaml
@@ -5,15 +5,25 @@ services:
     buildCommand: |
       pip install -r requirements.txt
       playwright install chromium
-    startCommand: uvicorn app:app --host 0.0.0.0 --port ${PORT:-8000}
+    startCommand: uvicorn main:app --host 0.0.0.0 --port ${PORT:-8000}
     envVars:
       - key: PYTHON_VERSION
-        value: 3.11  # Or your desired Python version
+        value: 3.11
       - key: DATABASE_URL
-        generateValue: true # For Render PostgreSQL, or set manually for SQLite/external DB
-      - key: AUTOTRADER_URL
-        value: "https://www.autotrader.com/cars-for-sale/private-seller" # Example
-      - key: SCRAPE_TIMEOUT
-        value: 120000 # Example: 120 seconds
-      - key: HEADLESS_BROWSER
+        generateValue: true
+      - key: HEADLESS
         value: "True"
+      - key: BROWSER_TIMEOUT
+        value: "60000"
+      - key: PAGE_DELAY
+        value: "5000"
+      - key: MIN_DELAY_BETWEEN_ACTIONS
+        value: "2.5"
+      - key: MAX_LISTINGS_PER_SESSION
+        value: "25"
+      - key: PROXY_SERVER
+        value: ""
+      - key: PROXY_USERNAME
+        value: ""
+      - key: PROXY_PASSWORD
+        value: ""
diff --git a/requirements.txt b/requirements.txt
index 2e8a221..b450ec4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,7 @@
-aiofiles
-asyncpg
-fastapi
-playwright
-playwright-stealth
-python-dotenv
-sqlalchemy[asyncio]
-uvicorn[standard]
+fastapi==0.104.1
+uvicorn==0.24.0
+sqlalchemy==2.0.23
+aiosqlite==0.19.0
+playwright==1.40.0
+python-dotenv==1.0.0
+pydantic==2.5.0
diff --git a/app/__init__.py b/src/__init__.py
similarity index 100%
rename from app/__init__.py
rename to src/__init__.py
diff --git a/src/api/__init__.py b/src/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/api/routes.py b/src/api/routes.py
new file mode 100644
index 0000000..612fb3f
--- /dev/null
+++ b/src/api/routes.py
@@ -0,0 +1,237 @@
+from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select, and_, exists, update
+from datetime import datetime
+import json
+from typing import List
+import logging
+
+from src.database import get_db, AsyncSessionLocal
+from src.models.vehicle import (
+    VehicleListing,
+    VehicleListingCreate,
+    VehicleListingResponse,
+    SearchFilters
+)
+from src.automation.browser_sim import AutoTraderScraper
+from src.config import settings
+
+router = APIRouter()
+logger = logging.getLogger(__name__)
+
+async def scrape_and_store_task(search_url: str, max_listings: int, source_site_name: str = "autotrader"):
+    logger.info(f"Background task started: Scraping {source_site_name} URL: {search_url} for max {max_listings} listings.")
+    created_count = 0
+    updated_count = 0
+    failed_count = 0
+    processed_urls = set()
+    if source_site_name.lower() == "autotrader":
+        ScraperClass = AutoTraderScraper
+    else:
+        logger.error(f"Unsupported source site: {source_site_name}")
+        return
+    async with ScraperClass() as scraper:
+        scraped_listings_pydantic = await scraper.scrape_listings(
+            search_url=search_url,
+            max_listings_to_fetch=max_listings
+        )
+    if not scraped_listings_pydantic:
+        logger.info(f"No listings returned from {source_site_name} scraper for URL: {search_url}")
+        return
+    logger.info(f"{source_site_name} scraper returned {len(scraped_listings_pydantic)} listings. Processing for DB storage...")
+    async with AsyncSessionLocal() as db_session:
+        for listing_data in scraped_listings_pydantic:
+            if not listing_data.listing_url:
+                logger.warning("Scraped data missing listing_url, skipping.")
+                failed_count += 1
+                continue
+            if listing_data.listing_url in processed_urls:
+                logger.debug(f"URL {listing_data.listing_url} already processed in this run, skipping duplicate.")
+                continue
+            processed_urls.add(listing_data.listing_url)
+            try:
+                stmt = select(VehicleListing).where(VehicleListing.listing_url == listing_data.listing_url)
+                result = await db_session.execute(stmt)
+                existing_vehicle = result.scalar_one_or_none()
+                features_json = json.dumps(listing_data.features) if listing_data.features else None
+                if existing_vehicle:
+                    logger.debug(f"Updating existing listing: {listing_data.listing_url} (ID: {existing_vehicle.id})")
+                    update_values = {}
+                    if listing_data.title and existing_vehicle.title != listing_data.title:
+                        update_values['title'] = listing_data.title
+                    if listing_data.price is not None and existing_vehicle.price != listing_data.price:
+                        update_values['price'] = listing_data.price
+                    if listing_data.mileage is not None and existing_vehicle.mileage != listing_data.mileage:
+                        update_values['mileage'] = listing_data.mileage
+                    if features_json and existing_vehicle.features != features_json:
+                        update_values['features'] = features_json
+                    if listing_data.photo_url and existing_vehicle.photo_url != listing_data.photo_url:
+                        update_values['photo_url'] = listing_data.photo_url
+                    if listing_data.location and existing_vehicle.location != listing_data.location:
+                        update_values['location'] = listing_data.location
+                    if listing_data.year and existing_vehicle.year != listing_data.year:
+                        update_values['year'] = listing_data.year
+                    if listing_data.make and existing_vehicle.make != listing_data.make:
+                        update_values['make'] = listing_data.make
+                    if listing_data.model and existing_vehicle.model != listing_data.model:
+                        update_values['model'] = listing_data.model
+                    if listing_data.trim and existing_vehicle.trim != listing_data.trim:
+                        update_values['trim'] = listing_data.trim
+                    update_values['is_active'] = True
+                    update_values['last_scraped_at'] = datetime.utcnow()
+                    if update_values:
+                        stmt_update = update(VehicleListing).where(VehicleListing.id == existing_vehicle.id).values(**update_values)
+                        await db_session.execute(stmt_update)
+                        updated_count += 1
+                else:
+                    logger.debug(f"Adding new listing: {listing_data.listing_url}")
+                    db_vehicle = VehicleListing(
+                        listing_id_external=listing_data.listing_id_external,
+                        title=listing_data.title,
+                        year=listing_data.year,
+                        make=listing_data.make,
+                        model=listing_data.model,
+                        trim=listing_data.trim,
+                        price=listing_data.price,
+                        mileage=listing_data.mileage,
+                        listing_url=listing_data.listing_url,
+                        photo_url=listing_data.photo_url,
+                        features=features_json,
+                        location=listing_data.location,
+                        seller_type=listing_data.seller_type,
+                        source_site=listing_data.source_site,
+                        is_active=True,
+                        last_scraped_at=datetime.utcnow()
+                    )
+                    db_session.add(db_vehicle)
+                    created_count += 1
+                await db_session.commit()
+            except Exception as e:
+                failed_count += 1
+                logger.error(f"Failed to process/store listing {listing_data.listing_url}: {e}", exc_info=True)
+                await db_session.rollback()
+        logger.info(f"Background task for {source_site_name} finished. Created={created_count}, Updated={updated_count}, Failed={failed_count}")
+
+@router.get("/", response_model=dict, include_in_schema=False)
+async def api_v1_root_info():
+    return {
+        "message": "Vehicle Tracking API - V1",
+        "active_endpoints": ["/vehicles", "/vehicles/search", "/vehicles/scrape", "/vehicles/{id}", "/vehicles/stats/summary"]
+    }
+
+@router.get("/vehicles/", response_model=List[VehicleListingResponse])
+async def get_all_vehicles(
+    skip: int = Query(0, ge=0),
+    limit: int = Query(settings.MAX_LISTINGS_PER_SESSION, ge=1, le=200),
+    db: AsyncSession = Depends(get_db),
+    filters: SearchFilters = Depends(),
+):
+    query = select(VehicleListing)
+    conditions = []
+    if filters.is_active is not None:
+        conditions.append(VehicleListing.is_active == filters.is_active)
+    if filters.make:
+        conditions.append(VehicleListing.make.ilike(f"%{filters.make}%"))
+    if filters.model:
+        conditions.append(VehicleListing.model.ilike(f"%{filters.model}%"))
+    if filters.min_year:
+        conditions.append(VehicleListing.year >= filters.min_year)
+    if filters.max_year:
+        conditions.append(VehicleListing.year <= filters.max_year)
+    if filters.min_price:
+        conditions.append(VehicleListing.price >= filters.min_price)
+    if filters.max_price:
+        conditions.append(VehicleListing.price <= filters.max_price)
+    if filters.max_mileage:
+        conditions.append(VehicleListing.mileage <= filters.max_mileage)
+    if filters.location:
+        conditions.append(VehicleListing.location.ilike(f"%{filters.location}%"))
+    if filters.seller_type:
+        conditions.append(VehicleListing.seller_type.ilike(f"%{filters.seller_type}%"))
+    if filters.source_site:
+        conditions.append(VehicleListing.source_site.ilike(f"%{filters.source_site}%"))
+    if conditions:
+        query = query.where(and_(*conditions))
+    query = query.order_by(VehicleListing.last_scraped_at.desc(), VehicleListing.created_at.desc())
+    result = await db.execute(query.offset(skip).limit(limit))
+    vehicles = result.scalars().all()
+    response_vehicles = []
+    for vehicle_db_item in vehicles:
+        response_vehicles.append(VehicleListingResponse.model_validate(vehicle_db_item))
+    return response_vehicles
+
+@router.get("/vehicles/{vehicle_id}", response_model=VehicleListingResponse)
+async def get_vehicle_by_id_route(vehicle_id: int, db: AsyncSession = Depends(get_db)):
+    query = select(VehicleListing).where(VehicleListing.id == vehicle_id)
+    result = await db.execute(query)
+    vehicle_db_item = result.scalar_one_or_none()
+    if not vehicle_db_item:
+        raise HTTPException(status_code=404, detail="Vehicle not found")
+    return VehicleListingResponse.model_validate(vehicle_db_item)
+
+@router.post("/vehicles/", response_model=VehicleListingResponse, status_code=201)
+async def create_vehicle_listing_manual(
+    vehicle_create_data: VehicleListingCreate,
+    db: AsyncSession = Depends(get_db)
+):
+    stmt_exists = select(exists().where(VehicleListing.listing_url == vehicle_create_data.listing_url))
+    url_exists = await db.scalar(stmt_exists)
+    if url_exists:
+        raise HTTPException(status_code=409, detail=f"Vehicle with URL {vehicle_create_data.listing_url} already exists.")
+    features_json_str = json.dumps(vehicle_create_data.features) if vehicle_create_data.features else None
+    db_vehicle_item = VehicleListing(
+        **vehicle_create_data.model_dump(exclude={'features'}),
+        features=features_json_str,
+        is_active=True,
+        last_scraped_at=datetime.utcnow()
+    )
+    db.add(db_vehicle_item)
+    await db.commit()
+    await db.refresh(db_vehicle_item)
+    return VehicleListingResponse.model_validate(db_vehicle_item)
+
+@router.post("/vehicles/scrape", status_code=202)
+async def trigger_site_scrape(
+    background_tasks: BackgroundTasks,
+    site_name: str = Query("autotrader", description="Name of the site to scrape (e.g., 'autotrader')."),
+    search_url: str = Query(..., description="Full search URL for the specified site."),
+    max_listings: int = Query(settings.MAX_LISTINGS_PER_SESSION, description="Maximum listings to fetch from this scrape.", ge=1, le=100)
+):
+    logger.info(f"Received request to scrape {site_name} URL: {search_url} for max {max_listings} listings.")
+    if site_name.lower() not in ["autotrader"]:
+        raise HTTPException(status_code=400, detail=f"Scraping for site '{site_name}' is not supported.")
+    background_tasks.add_task(scrape_and_store_task, search_url, max_listings, site_name)
+    return {"message": f"{site_name.capitalize()} scraping task accepted and started in the background for URL: {search_url}"}
+
+@router.delete("/vehicles/{vehicle_id}", status_code=200)
+async def delete_vehicle_listing(vehicle_id: int, db: AsyncSession = Depends(get_db)):
+    query = select(VehicleListing).where(VehicleListing.id == vehicle_id)
+    result = await db.execute(query)
+    vehicle_db_item = result.scalar_one_or_none()
+    if not vehicle_db_item:
+        raise HTTPException(status_code=404, detail="Vehicle not found")
+    await db.delete(vehicle_db_item)
+    await db.commit()
+    return {"message": "Vehicle deleted successfully"}
+
+@router.get("/vehicles/stats/summary", response_model=dict)
+async def get_vehicle_listing_stats(db: AsyncSession = Depends(get_db)):
+    from sqlalchemy import func as sql_func
+    make_query = select(VehicleListing.make, sql_func.count(VehicleListing.id).label('count'))\
+                 .where(VehicleListing.make.isnot(None)).group_by(VehicleListing.make).order_by(sql_func.count(VehicleListing.id).desc())
+    make_result = await db.execute(make_query)
+    make_stats = [{"make": row[0], "count": row[1]} for row in make_result.all()]
+    year_query = select(VehicleListing.year, sql_func.avg(VehicleListing.price).label('avg_price'), sql_func.count(VehicleListing.id).label('count'))\
+                 .where(VehicleListing.year.isnot(None)).group_by(VehicleListing.year).order_by(VehicleListing.year.desc())
+    year_result = await db.execute(year_query)
+    year_stats = [{"year": row[0], "avg_price": round(row[1], 2) if row[1] else 0.0, "count": row[2]} for row in year_result.all()]
+    total_query = select(sql_func.count(VehicleListing.id))
+    total_count = await db.scalar(total_query) or 0
+    active_query = select(sql_func.count(VehicleListing.id)).where(VehicleListing.is_active == True)
+    active_count = await db.scalar(active_query) or 0
+    return {
+        "total_listings_in_db": total_count,
+        "active_listings": active_count,
+        "by_make": make_stats,
+        "by_year_with_avg_price": year_stats
+    }
diff --git a/src/automation/__init__.py b/src/automation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/automation/browser_sim.py b/src/automation/browser_sim.py
new file mode 100644
index 0000000..c93f204
--- /dev/null
+++ b/src/automation/browser_sim.py
@@ -0,0 +1,420 @@
+import asyncio
+import json
+import re
+import random
+from typing import List, Dict, Optional
+from playwright.async_api import async_playwright, Page, Browser, PlaywrightException, Locator
+from urllib.parse import urljoin, urlparse, parse_qs
+from datetime import datetime
+import logging
+import hashlib
+
+from src.config import settings
+from src.models.vehicle import VehicleListingCreate
+
+logger = logging.getLogger(__name__)
+
+class AutoTraderScraper:
+    def __init__(self):
+        self.browser: Optional[Browser] = None
+        self.playwright_instance: Optional[async_playwright] = None
+        self.base_action_delay = settings.MIN_DELAY_BETWEEN_ACTIONS
+        self.page_load_delay = settings.PAGE_DELAY / 1000
+
+    async def __aenter__(self):
+        logger.info("Initializing AutoTrader Scraper...")
+        self.playwright_instance = await async_playwright().start()
+        try:
+            proxy_cfg = None
+            if settings.PROXY_SERVER:
+                proxy_cfg = {"server": settings.PROXY_SERVER}
+                if settings.PROXY_USERNAME and settings.PROXY_PASSWORD:
+                    proxy_cfg["username"] = settings.PROXY_USERNAME
+                    proxy_cfg["password"] = settings.PROXY_PASSWORD
+
+            self.browser = await self.playwright_instance.chromium.launch(
+                headless=settings.HEADLESS,
+                proxy=proxy_cfg,
+                args=[
+                    '--no-sandbox',
+                    '--disable-setuid-sandbox',
+                    '--disable-infobars',
+                    '--window-position=0,0',
+                    '--ignore-certificate-errors',
+                    '--ignore-certificate-errors-spki-list',
+                    '--disable-blink-features=AutomationControlled',
+                    '--disable-dev-shm-usage'
+                ],
+                timeout=settings.BROWSER_TIMEOUT
+            )
+            logger.info(f"Browser launched (Headless: {settings.HEADLESS})")
+        except PlaywrightException as e:
+            logger.error(f"Failed to launch browser: {e}")
+            if self.playwright_instance:
+                await self.playwright_instance.stop()
+            raise
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        logger.info("Closing AutoTrader Scraper resources...")
+        if self.browser and self.browser.is_connected():
+            try:
+                await self.browser.close()
+                logger.info("Browser closed.")
+            except PlaywrightException as e:
+                logger.error(f"Error closing browser: {e}")
+        if self.playwright_instance:
+            try:
+                await self.playwright_instance.stop()
+                logger.info("Playwright instance stopped.")
+            except Exception as e:
+                logger.error(f"Error stopping Playwright: {e}")
+        if exc_type:
+            logger.error(f"Exception occurred during scraping: {exc_val}", exc_info=(exc_type, exc_val, exc_tb))
+
+    async def _apply_stealth_measures(self, page: Page):
+        logger.info("Applying stealth measures to page...")
+        user_agents = [
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
+            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
+        ]
+        await page.set_extra_http_headers({"User-Agent": random.choice(user_agents)})
+        await page.add_init_script("""
+            Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
+            Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en', 'en-GB'] });
+            const pluginCount = Math.floor(Math.random() * 3) + 1;
+            Object.defineProperty(navigator, 'plugins', {
+                get: () => Array(pluginCount).fill(null).map((_, i) => ({ name: `Plugin ${i}`, filename: `plugin${i}.dll`, description: `Mock plugin ${i}` }))
+            });
+            const mimeTypeCount = Math.floor(Math.random() * 3) + 1;
+             Object.defineProperty(navigator, 'mimeTypes', {
+                get: () => Array(mimeTypeCount).fill(null).map((_, i) => ({ type: `application/x-mimetype${i}`, suffixes: `m${i}`, description: `Mock mimetype ${i}` }))
+            });
+            const getParameter = WebGLRenderingContext.prototype.getParameter;
+            WebGLRenderingContext.prototype.getParameter = function(parameter) {
+                if (parameter === 37445) return 'Intel Open Source Technology Center';
+                if (parameter === 37446) return 'Mesa DRI Intel(R) Iris Xe Graphics (TGL GT2)';
+                return getParameter.apply(this, arguments);
+            };
+            const originalQuery = window.navigator.permissions.query;
+            window.navigator.permissions.query = (parameters) => (
+                parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters)
+            );
+            try { Date.prototype.getTimezoneOffset = function() { return -Math.floor(Math.random() * 8 + 3) * 60; }; } catch (e) {}
+        """)
+        viewports = [{"width": 1920, "height": 1080}, {"width": 1366, "height": 768}, {"width": 1440, "height": 900}, {"width": 2560, "height": 1440}]
+        await page.set_viewport_size(random.choice(viewports))
+        logger.info("Stealth measures applied.")
+
+    async def _human_like_delay(self, min_delay: Optional[float] = None, max_delay: Optional[float] = None):
+        min_d = min_delay if min_delay is not None else self.base_action_delay
+        max_d = max_delay if max_delay is not None else self.base_action_delay + 2.0
+        delay = random.uniform(min_d, max_d)
+        logger.debug(f"Waiting {delay:.2f} seconds...")
+        await asyncio.sleep(delay)
+
+    async def _human_like_scroll(self, page: Page, scroll_attempts=7):
+        logger.info(f"Performing human-like scrolling: {scroll_attempts} attempts...")
+        previous_scroll_height = -1.0
+        for i in range(scroll_attempts):
+            current_scroll_height = float(await page.evaluate("document.body.scrollHeight"))
+            if abs(current_scroll_height - previous_scroll_height) < 1.0 and i > 0:
+                logger.info(f"Scroll attempt {i+1}: Reached end of scrollable content or no new content loaded.")
+                break
+            scroll_amount = await page.evaluate(f"Math.random() * window.innerHeight * 0.7 + window.innerHeight * 0.3")
+            await page.evaluate(f"window.scrollBy(0, {scroll_amount})")
+            await self._human_like_delay(min_delay=0.8, max_delay=2.2)
+            previous_scroll_height = current_scroll_height
+        logger.info("Scrolling to bottom one last time...")
+        await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+        await self._human_like_delay(min_delay=2.0, max_delay=3.5)
+        logger.info("Scrolling finished.")
+
+    def _extract_listing_id_from_url(self, url: str) -> Optional[str]:
+        if not url:
+            return None
+        try:
+            parsed_url = urlparse(url)
+            query_params = parse_qs(parsed_url.query)
+            if 'listingId' in query_params:
+                return query_params['listingId'][0]
+            path_parts = [part for part in parsed_url.path.split('/') if part]
+            if 'vehicle' in path_parts:
+                vehicle_idx = path_parts.index('vehicle')
+                if vehicle_idx + 1 < len(path_parts):
+                    return path_parts[vehicle_idx+1]
+            for part in reversed(path_parts):
+                if part.isdigit() and len(part) > 5:
+                    return part
+        except Exception as e:
+            logger.warning(f"Could not parse structured listing ID from URL {url}: {e}")
+        logger.debug(f"No structured ID found, hashing URL for ID: {url}")
+        return hashlib.md5(url.encode()).hexdigest()[:16]
+
+    def _parse_title_details(self, title_str: str) -> Dict:
+        details = {'year': None, 'make': None, 'model': None, 'trim': None}
+        if not title_str:
+            return details
+        original_title = title_str
+        year_match = re.search(r'\b(19[89]\d|20[0-2]\d|2030)\b', title_str)
+        if year_match:
+            details['year'] = int(year_match.group(1))
+            title_str = title_str.replace(year_match.group(1), "", 1).strip()
+        title_str = re.sub(r'^(Used|New|Certified Pre-Owned|CPO)\s+', '', title_str, flags=re.IGNORECASE).strip()
+        parts = title_str.split(maxsplit=3)
+        if len(parts) > 0:
+            details['make'] = parts[0]
+        if len(parts) > 1:
+            details['model'] = parts[1]
+        if len(parts) > 2:
+            details['trim'] = " ".join(parts[2:])
+        logger.debug(f"Parsed title details: {details} from original title: '{original_title}'")
+        return details
+
+    async def _extract_listing_data(self, listing_element: Locator, page_url: str) -> Optional[VehicleListingCreate]:
+        data_dict: Dict[str, any] = {}
+        listing_html_for_debug = "N/A (HTML not captured)"
+        try:
+            link_el_selectors = [
+                'a[data-cmp="inventoryListingCardLink"]',
+                'a[data-testid="srp-list-item-link"]',
+                'a[href*="vehicledetails.xhtml?listingId="]',
+                'h2 > a',
+                'h3 > a'
+            ]
+            raw_href = None
+            for selector in link_el_selectors:
+                link_el = listing_element.locator(selector).first
+                if await link_el.count():
+                    raw_href = await link_el.get_attribute("href", timeout=1500)
+                    if raw_href:
+                        break
+            if not raw_href:
+                logger.warning("No primary link found for a listing card. Skipping.")
+                return None
+            data_dict['listing_url'] = urljoin(page_url, raw_href)
+
+            title_el_selectors = ["h2[data-cmp*='title']", "h3[data-cmp*='title']", "div[data-cmp='displayName'] h2", "h2", "h3"]
+            raw_title = "Title Not Found"
+            for selector in title_el_selectors:
+                title_el = listing_element.locator(selector).first
+                if await title_el.count():
+                    try:
+                        raw_title = await title_el.text_content(timeout=1500)
+                        if raw_title and raw_title.strip():
+                            break
+                    except PlaywrightException:
+                        continue
+            data_dict['title'] = raw_title.strip()
+            title_details = self._parse_title_details(data_dict['title'])
+            data_dict.update(title_details)
+
+            price_selectors = [
+                "span[data-cmp='pricingSection'] .text-size-lg-3",
+                "span[data-cmp='pricingSection']",
+                ".pricing-section .first-price",
+                "span[class*='price']", "div[class*='price']"
+            ]
+            for selector in price_selectors:
+                price_el = listing_element.locator(selector).filter(has_text=re.compile(r"\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?")).first
+                if await price_el.count():
+                    try:
+                        price_text = await price_el.text_content(timeout=1000)
+                        cleaned_price = re.sub(r'[^\d.]', '', price_text)
+                        if cleaned_price and cleaned_price != '.':
+                            data_dict['price'] = float(cleaned_price)
+                            break
+                    except PlaywrightException:
+                        continue
+
+            mileage_selectors = [
+                "div[data-cmp='listUnstyled'] li:has-text('miles')",
+                "div.item-vehicle-mileage",
+                "div[class*='mileage']", "span[class*='mileage']"
+            ]
+            for selector in mileage_selectors:
+                mileage_el = listing_element.locator(selector).filter(has_text=re.compile(r"[\d,]+\s*mi(?:les)?", re.IGNORECASE)).first
+                if await mileage_el.count():
+                    try:
+                        mileage_text = await mileage_el.text_content(timeout=1000)
+                        match = re.search(r'([\d,]+)\s*mi', mileage_text, re.IGNORECASE)
+                        if match:
+                            data_dict['mileage'] = int(re.sub(r',', '', match.group(1)))
+                            break
+                    except PlaywrightException:
+                        continue
+
+            photo_selectors = [
+                'img[data-cmp="responsiveImage"]',
+                'img[data-testid="srp-list-item-image"]',
+                '.srp-img-container img',
+                'img[alt*="vehicle image"]'
+            ]
+            for selector in photo_selectors:
+                photo_el = listing_element.locator(selector).first
+                if await photo_el.count():
+                    try:
+                        src = await photo_el.get_attribute("src", timeout=1000)
+                        if src and not src.startswith('data:image'):
+                            data_dict['photo_url'] = urljoin(page_url, src)
+                            break
+                    except PlaywrightException:
+                        continue
+
+            features_list = []
+            feature_selectors = ["ul[class*='features'] li", "div[data-cmp='pill']", ".item-特色 span"]
+            for selector in feature_selectors:
+                feature_elements = await listing_element.locator(selector).all()
+                for fe_el in feature_elements[:5]:
+                    try:
+                        f_text = await fe_el.text_content(timeout=500)
+                        if f_text and len(f_text.strip()) > 2 and len(f_text.strip()) < 50:
+                            features_list.append(f_text.strip())
+                    except PlaywrightException:
+                        continue
+                if features_list:
+                    break
+            data_dict['features'] = list(set(features_list))
+
+            location_selectors = ["div[data-cmp*='location']", "div.text-gray-dark.text-truncate", ".item-location"]
+            for selector in location_selectors:
+                location_el = listing_element.locator(selector).first
+                if await location_el.count():
+                    try:
+                        loc_text = await location_el.text_content(timeout=1000)
+                        data_dict['location'] = loc_text.replace('Located in', '').replace('Dealership Location', '').strip()
+                        if data_dict['location']:
+                            break
+                    except PlaywrightException:
+                        continue
+
+            data_dict['listing_id_external'] = self._extract_listing_id_from_url(data_dict['listing_url'])
+
+            return VehicleListingCreate(**data_dict)
+
+        except PlaywrightException as e:
+            listing_html_for_debug = await listing_element.evaluate("element => element.outerHTML", timeout=1000)
+            logger.error(f"Playwright error extracting data from a listing card: {e}. HTML: {listing_html_for_debug[:500]}...")
+        except Exception as e:
+            listing_html_for_debug = await listing_element.evaluate("element => element.outerHTML", timeout=1000)
+            logger.error(f"General error extracting data from a listing card: {e}. HTML: {listing_html_for_debug[:500]}...")
+        return None
+
+    async def scrape_listings(self, search_url: str, max_listings_to_fetch: int) -> List[VehicleListingCreate]:
+        if not self.browser or not self.browser.is_connected():
+            logger.error("Browser not initialized or not connected. Call within async context manager.")
+            return []
+        page: Optional[Page] = None
+        processed_listings: List[VehicleListingCreate] = []
+        try:
+            context = await self.browser.new_context(
+                java_script_enabled=True,
+                accept_downloads=False,
+                locale='en-US'
+            )
+            page = await context.new_page()
+            await self._apply_stealth_measures(page)
+            logger.info(f"Navigating to search URL: {search_url}")
+            await page.goto(search_url, wait_until="domcontentloaded", timeout=settings.BROWSER_TIMEOUT)
+            await self._human_like_delay(min_delay=self.page_load_delay, max_delay=self.page_load_delay + 3.0)
+            cookie_selectors = ['#onetrust-accept-btn-handler', 'button:has-text("Accept All Cookies")']
+            for cs_selector in cookie_selectors:
+                try:
+                    cookie_button = page.locator(cs_selector).first
+                    if await cookie_button.is_visible(timeout=3000):
+                        await cookie_button.click(timeout=5000, delay=random.uniform(0.3,0.8)*1000)
+                        logger.info(f"Clicked cookie banner: {cs_selector}")
+                        await self._human_like_delay(1.5, 2.5)
+                        break
+                except PlaywrightException:
+                    logger.debug(f"Cookie banner not found/visible or clickable with: {cs_selector}")
+            await self._human_like_scroll(page, scroll_attempts=settings.MAX_LISTINGS_PER_SESSION // 5 or 5)
+            listing_card_selectors = [
+                "article[data-cmp='inventoryListing']",
+                "div[data-testid='srp-listing-item']",
+                "div[data-cmp='inventorySpotlightListingCard']",
+                ".inventory-listing",
+                "div[class*='srp-results'] div[class*='vehicle-card']"
+            ]
+            all_card_elements_locators = []
+            for selector in listing_card_selectors:
+                elements_on_page = await page.locator(selector).count()
+                if elements_on_page > 0:
+                    logger.info(f"Found {elements_on_page} cards with selector '{selector}'")
+                    all_card_elements_locators.append(page.locator(selector))
+                    if selector in ["article[data-cmp='inventoryListing']", "div[data-testid='srp-listing-item']"]:
+                        break
+            final_card_locator = None
+            if all_card_elements_locators:
+                final_card_locator = all_card_elements_locators[0]
+            if not final_card_locator:
+                logger.warning(f"No listing cards found on page: {search_url}.")
+                try:
+                    await page.screenshot(path=f"debug_no_listings_{datetime.now():%Y%m%d%H%M%S}.png")
+                except Exception as e:
+                    logger.error(f"Failed to save screenshot: {e}")
+                return []
+            num_cards_on_page = await final_card_locator.count()
+            logger.info(f"Total listing cards to process with chosen locator: {num_cards_on_page}")
+            for i in range(num_cards_on_page):
+                if len(processed_listings) >= max_listings_to_fetch:
+                    logger.info(f"Reached max listings to fetch: {max_listings_to_fetch}")
+                    break
+                card_element = final_card_locator.nth(i)
+                logger.info(f"Processing card {i+1}/{num_cards_on_page}...")
+                try:
+                    if not await card_element.is_visible(timeout=3000):
+                        await card_element.scroll_into_view_if_needed(timeout=5000)
+                        await self._human_like_delay(0.5, 1.0)
+                except PlaywrightException as e:
+                    logger.warning(f"Card {i+1} not visible or could not scroll into view, skipping: {e}")
+                    continue
+                listing_data = await self._extract_listing_data(card_element, page.url)
+                if listing_data:
+                    processed_listings.append(listing_data)
+                    logger.info(f"Successfully extracted: {listing_data.title[:60]}... ({listing_data.listing_id_external})")
+                else:
+                    logger.warning(f"Failed to extract complete data from card {i+1}.")
+                await self._human_like_delay()
+        except PlaywrightException as e:
+            logger.error(f"A Playwright error occurred during scraping session for {search_url}: {e}", exc_info=True)
+            if page:
+                try:
+                    await page.screenshot(path=f"error_pw_session_{datetime.now():%Y%m%d%H%M%S}.png")
+                except Exception as se:
+                    logger.error(f"Failed to save error screenshot: {se}")
+        except Exception as e:
+            logger.error(f"An unexpected error occurred during scraping session for {search_url}: {e}", exc_info=True)
+            if page:
+                try:
+                    await page.screenshot(path=f"error_unexpected_session_{datetime.now():%Y%m%d%H%M%S}.png")
+                except Exception as se:
+                    logger.error(f"Failed to save error screenshot: {se}")
+        finally:
+            if page:
+                try:
+                    await page.close()
+                except PlaywrightException as e:
+                    logger.error(f"Error closing page: {e}")
+            if 'context' in locals() and context:
+                try:
+                    await context.close()
+                except PlaywrightException as e:
+                    logger.error(f"Error closing context: {e}")
+        logger.info(f"Scraping session for {search_url} finished. Extracted {len(processed_listings)} listings.")
+        return processed_listings[:max_listings_to_fetch]
+
+async def run_autotrader_scraper_example_standalone():
+    example_search_url = "https://www.autotrader.com/cars-for-sale/by-owner/all-states?searchRadius=0&sortBy=datelistedDESC&numRecords=25"
+    max_to_get = settings.MAX_LISTINGS_PER_SESSION
+    async with AutoTraderScraper() as scraper:
+        results = await scraper.scrape_listings(example_search_url, max_listings_to_fetch=max_to_get)
+        if results:
+            logger.info(f"\n--- Scraped {len(results)} AutoTrader Listings (Standalone Example Run) ---")
+            for i, listing in enumerate(results):
+                logger.info(f"{i+1}. ID_Ext: {listing.listing_id_external} - {listing.title} ({listing.year} {listing.make} {listing.model}) - Price: ${listing.price if listing.price else 'N/A'}")
+        else:
+            logger.info("No listings were extracted in the standalone example run.")
+    return results
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..94a9b77
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,21 @@
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+class Settings:
+    DATABASE_URL: str = os.getenv("DATABASE_URL", "sqlite+aiosqlite:///./default_vehicle_data.db")
+    HEADLESS: bool = os.getenv("HEADLESS", "true").lower() == "true"
+    BROWSER_TIMEOUT: int = int(os.getenv("BROWSER_TIMEOUT", "60000"))
+    PAGE_DELAY: int = int(os.getenv("PAGE_DELAY", "5000"))
+    MIN_DELAY_BETWEEN_ACTIONS: float = float(os.getenv("MIN_DELAY_BETWEEN_ACTIONS", "2.5"))
+    API_HOST: str = os.getenv("API_HOST", "127.0.0.1")
+    API_PORT: int = int(os.getenv("API_PORT", "8000"))
+    MAX_LISTINGS_PER_SESSION: int = int(os.getenv("MAX_LISTINGS_PER_SESSION", "25"))
+
+    # Proxy configuration
+    PROXY_SERVER: str | None = os.getenv("PROXY_SERVER")
+    PROXY_USERNAME: str | None = os.getenv("PROXY_USERNAME")
+    PROXY_PASSWORD: str | None = os.getenv("PROXY_PASSWORD")
+
+settings = Settings()
diff --git a/src/database.py b/src/database.py
new file mode 100644
index 0000000..e42f8f1
--- /dev/null
+++ b/src/database.py
@@ -0,0 +1,28 @@
+from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
+from src.config import settings
+
+DATABASE_URL = settings.DATABASE_URL
+
+engine = create_async_engine(
+    DATABASE_URL,
+    echo=False,
+    future=True
+)
+
+AsyncSessionLocal = async_sessionmaker(
+    bind=engine,
+    class_=AsyncSession,
+    expire_on_commit=False
+)
+
+async def create_db_tables():
+    from src.models.vehicle import Base
+    async with engine.begin() as conn:
+        await conn.run_sync(Base.metadata.create_all)
+
+async def get_db():
+    async with AsyncSessionLocal() as session:
+        try:
+            yield session
+        finally:
+            await session.close()
diff --git a/src/models/__init__.py b/src/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/models/vehicle.py b/src/models/vehicle.py
new file mode 100644
index 0000000..09bd717
--- /dev/null
+++ b/src/models/vehicle.py
@@ -0,0 +1,84 @@
+from sqlalchemy import Column, Integer, String, Float, DateTime, Text, Boolean
+from sqlalchemy.orm import declarative_base
+from sqlalchemy.sql import func
+from pydantic import BaseModel, Field
+from typing import Optional, List
+from datetime import datetime
+
+Base = declarative_base()
+
+class VehicleListing(Base):
+    __tablename__ = "vehicle_listings"
+
+    id = Column(Integer, primary_key=True, index=True)
+    listing_id_external = Column(String, index=True, unique=False, nullable=True)
+    title = Column(String, nullable=False)
+    year = Column(Integer, index=True, nullable=True)
+    make = Column(String, index=True, nullable=True)
+    model = Column(String, index=True, nullable=True)
+    trim = Column(String, nullable=True)
+    price = Column(Float, index=True, nullable=True)
+    mileage = Column(Integer, index=True, nullable=True)
+    listing_url = Column(Text, unique=True, nullable=False, index=True)
+    photo_url = Column(Text, nullable=True)
+    features = Column(Text, nullable=True)
+    location = Column(String, nullable=True)
+    seller_type = Column(String, default="private", nullable=True)
+    source_site = Column(String, default="autotrader", nullable=True)
+    created_at = Column(DateTime, default=func.now())
+    updated_at = Column(DateTime, default=func.now(), onupdate=func.now())
+    last_scraped_at = Column(DateTime, default=func.now(), onupdate=func.now())
+    is_active = Column(Boolean, default=True, index=True)
+
+class VehicleListingCreate(BaseModel):
+    listing_id_external: Optional[str] = None
+    title: str
+    year: Optional[int] = None
+    make: Optional[str] = None
+    model: Optional[str] = None
+    trim: Optional[str] = None
+    price: Optional[float] = None
+    mileage: Optional[int] = None
+    listing_url: str
+    photo_url: Optional[str] = None
+    features: Optional[List[str]] = Field(default_factory=list)
+    location: Optional[str] = None
+    seller_type: Optional[str] = "private"
+    source_site: Optional[str] = "autotrader"
+
+class VehicleListingResponse(BaseModel):
+    id: int
+    listing_id_external: Optional[str] = None
+    title: str
+    year: Optional[int] = None
+    make: Optional[str] = None
+    model: Optional[str] = None
+    trim: Optional[str] = None
+    price: Optional[float] = None
+    mileage: Optional[int] = None
+    listing_url: str
+    photo_url: Optional[str] = None
+    features: Optional[List[str]] = Field(default_factory=list)
+    location: Optional[str] = None
+    seller_type: Optional[str] = None
+    source_site: Optional[str] = None
+    created_at: datetime
+    updated_at: datetime
+    last_scraped_at: datetime
+    is_active: bool
+
+    class Config:
+        from_attributes = True
+
+class SearchFilters(BaseModel):
+    make: Optional[str] = None
+    model: Optional[str] = None
+    min_year: Optional[int] = None
+    max_year: Optional[int] = None
+    min_price: Optional[float] = None
+    max_price: Optional[float] = None
+    max_mileage: Optional[int] = None
+    location: Optional[str] = None
+    seller_type: Optional[str] = None
+    source_site: Optional[str] = None
+    is_active: Optional[bool] = True
diff --git a/stealth_utils.py b/stealth_utils.py
deleted file mode 100644
index e956687..0000000
--- a/stealth_utils.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import logging
-
-async def apply_stealth_js(page):
-    """
-    Applies various JavaScript injections to make Playwright less detectable.
-    """
-    try:
-        # Pass the User-Agent test (though Playwright usually handles this well)
-        # user_agent = await page.evaluate("() => navigator.userAgent")
-        # await page.set_extra_http_headers({'User-Agent': user_agent.replace("HeadlessChrome", "Chrome")}) # Example
-
-        # Pass the WebGL test
-        await page.add_init_script("(() => { const getParameter = WebGLRenderingContext.prototype.getParameter; WebGLRenderingContext.prototype.getParameter = function(parameter) { if (parameter === 37445) { return 'Intel Open Source Technology Center'; } if (parameter === 37446) { return 'Mesa DRI Intel(R) Ivybridge Mobile '; } return getParameter(parameter); }; })()")
-
-        # Pass the Chrome test
-        await page.add_init_script("(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); })()")
-        await page.add_init_script("(() => { window.chrome = { runtime: {}, loadTimes: function(){}, csi: function(){} }; })()")
-
-        # Pass the Permissions test
-        await page.add_init_script("(() => { const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters) => ( parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters) ); })()")
-
-        # Pass the Plugins Length test
-        await page.add_init_script("(() => { Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); })()")
-
-        # Pass the Languages test
-        await page.add_init_script("(() => { Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); })()")
-
-        logging.info("Applied JavaScript stealth techniques from stealth_utils.")
-    except Exception as e:
-        logging.error(f"Error applying stealth JS from stealth_utils: {e}", exc_info=True)