diff --git a/mwscrape/scrape.py b/mwscrape/scrape.py index 8a373f6..ebba459 100644 --- a/mwscrape/scrape.py +++ b/mwscrape/scrape.py @@ -19,7 +19,7 @@ from urllib.parse import urlparse from urllib.parse import urlunparse from collections import namedtuple -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from multiprocessing import RLock from multiprocessing.pool import ThreadPool from contextlib import contextmanager @@ -336,7 +336,7 @@ def main(): site_host = session_doc["site"] scheme, host = scheme_and_host(site_host) db_name = session_doc["db_name"] - session_doc["resumed_at"] = datetime.utcnow().isoformat() + session_doc["resumed_at"] = datetime.now(timezone.utc).replace(tzinfo=None).isoformat() if args.start: start_page_name = args.start else: @@ -362,7 +362,7 @@ def main(): ) print("Starting session %s" % session_id) sessions_db[session_id] = { - "created_at": datetime.utcnow().isoformat(), + "created_at": datetime.now(timezone.utc).isoformat(), "site": site_host, "db_name": db_name, "descending": descending, @@ -429,7 +429,7 @@ def recently_changed_pages(timestamp): elif args.changes_since or args.recent: if args.recent: recent_days = args.recent_days - changes_since = fmt_mw_tms(datetime.utcnow() + timedelta(days=-recent_days)) + changes_since = fmt_mw_tms(datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(days=-recent_days)) else: changes_since = args.changes_since.ljust(14, "0") print("Getting recent changes (since %s)" % changes_since) @@ -458,7 +458,7 @@ def update_session(title): with lock: session_doc = sessions_db[session_id] session_doc["last_page_name"] = title - session_doc["updated_at"] = datetime.utcnow().isoformat() + session_doc["updated_at"] = datetime.now(timezone.utc).isoformat() sessions_db[session_id] = session_doc def process(page):