Skip to content

Commit

Permalink
Move _WindowsAdapter to _utils module
Browse files Browse the repository at this point in the history
  • Loading branch information
elacuesta committed Jun 21, 2024
1 parent 4674ba8 commit 60a4e06
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 38 deletions.
45 changes: 43 additions & 2 deletions scrapy_playwright/_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import asyncio
import concurrent
import logging
import platform
import threading
from typing import Awaitable, Iterator, Optional, Tuple, Union

import scrapy
from playwright.async_api import Error, Page, Request, Response
from scrapy import Spider
from scrapy.http.headers import Headers
from scrapy.utils.python import to_unicode
from twisted.internet.defer import Deferred
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding


Expand Down Expand Up @@ -53,7 +58,7 @@ def _is_safe_close_error(error: Error) -> bool:

async def _get_page_content(
page: Page,
spider: Spider,
spider: scrapy.Spider,
context_name: str,
scrapy_request_url: str,
scrapy_request_method: str,
Expand Down Expand Up @@ -89,3 +94,39 @@ async def _get_header_value(
return await resource.header_value(header_name)
except Exception:
return None


if platform.system() == "Windows":

class _WindowsAdapter:
"""Utility class to redirect coroutines to an asyncio event loop running
in a different thread. This allows to use a ProactorEventLoop, which is
supported by Playwright on Windows.
"""

loop = None
thread = None

@classmethod
def get_event_loop(cls) -> asyncio.AbstractEventLoop:
if cls.thread is None:
if cls.loop is None:
policy = asyncio.WindowsProactorEventLoopPolicy() # type: ignore
cls.loop = policy.new_event_loop()
asyncio.set_event_loop(cls.loop)
if not cls.loop.is_running():
cls.thread = threading.Thread(target=cls.loop.run_forever, daemon=True)
cls.thread.start()
return cls.loop

@classmethod
async def get_result(cls, o) -> concurrent.futures.Future:
return asyncio.run_coroutine_threadsafe(coro=o, loop=cls.get_event_loop()).result()

def deferred_from_coro(o) -> Deferred:
if isinstance(o, Deferred):
return o
return scrapy.utils.defer.deferred_from_coro(_WindowsAdapter.get_result(o))

else:
deferred_from_coro = scrapy.utils.defer.deferred_from_coro
38 changes: 3 additions & 35 deletions scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import asyncio
import concurrent
import logging
import platform
from contextlib import suppress
from dataclasses import dataclass
from ipaddress import ip_address
Expand All @@ -27,7 +25,6 @@
from scrapy.http.headers import Headers
from scrapy.responsetypes import responsetypes
from scrapy.settings import Settings
from scrapy.utils.defer import deferred_from_coro as deferred_from_coro_default
from scrapy.utils.misc import load_object
from scrapy.utils.reactor import verify_installed_reactor
from twisted.internet.defer import Deferred, inlineCallbacks
Expand All @@ -40,48 +37,19 @@
_get_page_content,
_is_safe_close_error,
_maybe_await,
deferred_from_coro,
)


__all__ = ["ScrapyPlaywrightDownloadHandler"]


if platform.system() == "Windows":
import threading

class _WindowsAdapter:
loop = None
thread = None

@classmethod
def get_event_loop(cls) -> asyncio.AbstractEventLoop:
if cls.thread is None:
if cls.loop is None:
policy = asyncio.WindowsProactorEventLoopPolicy() # type: ignore
cls.loop = policy.new_event_loop()
asyncio.set_event_loop(cls.loop)
if not cls.loop.is_running():
cls.thread = threading.Thread(target=cls.loop.run_forever, daemon=True)
cls.thread.start()
return cls.loop

@classmethod
async def get_result(cls, o) -> concurrent.futures.Future:
return asyncio.run_coroutine_threadsafe(coro=o, loop=cls.get_event_loop()).result()

def deferred_from_coro(o) -> Deferred:
if isinstance(o, Deferred):
return o
return deferred_from_coro_default(_WindowsAdapter.get_result(o))

else:
deferred_from_coro = deferred_from_coro_default


PlaywrightHandler = TypeVar("PlaywrightHandler", bound="ScrapyPlaywrightDownloadHandler")


logger = logging.getLogger("scrapy-playwright")


DEFAULT_BROWSER_TYPE = "chromium"
DEFAULT_CONTEXT_NAME = "default"
PERSISTENT_CONTEXT_PATH_KEY = "user_data_dir"
Expand Down
3 changes: 2 additions & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@


if platform.system() == "Windows":
from scrapy_playwright.handler import _WindowsAdapter
from scrapy_playwright._utils import _WindowsAdapter

def allow_windows(test_method):
"""Wrap tests with the _WindowsAdapter class on Windows."""
if not inspect.iscoroutinefunction(test_method):
raise RuntimeError(f"{test_method} must be an async def method")

Expand Down

0 comments on commit 60a4e06

Please sign in to comment.