From 5f4c81e78727e9bd18ce4017e721ffe1332c3048 Mon Sep 17 00:00:00 2001 From: Garrett Boast Date: Fri, 29 Sep 2017 20:15:15 -0400 Subject: [PATCH 1/2] Implemented --listonly option in dumpgenerator.py The --listonly option skips the dump step of page and image retrievals, potentially satisfying the enhancement request at #272 --- dumpgenerator.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index a045ace5..e6c68ad6 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -1303,6 +1303,8 @@ def getParameters(params=[]): help='store only the current version of pages') groupDownload.add_argument( '--images', action='store_true', help="generates an image dump") + groupDownload.add_argument( + '--listonly', help="only retrieves lists and doesn't download the content") groupDownload.add_argument( '--namespaces', metavar="1,2,3", @@ -1488,6 +1490,7 @@ def getParameters(params=[]): 'images': args.images, 'logs': False, 'xml': args.xml, + 'listonly': args.listonly, 'namespaces': namespaces, 'exnamespaces': exnamespaces, 'path': args.path and os.path.normpath(args.path) or '', @@ -1648,19 +1651,24 @@ def createNewDump(config={}, other={}): if config['xml']: getPageTitles(config=config, session=other['session']) titles=readTitles(config) - generateXMLDump(config=config, titles=titles, session=other['session']) - checkXMLIntegrity( - config=config, - titles=titles, - session=other['session']) + if not config['listonly']: + generateXMLDump( + config=config, + titles=titles, + session=other['session']) + checkXMLIntegrity( + config=config, + titles=titles, + session=other['session']) if config['images']: images += getImageNames(config=config, session=other['session']) saveImageNames(config=config, images=images, session=other['session']) - generateImageDump( - config=config, - other=other, - images=images, - session=other['session']) + if not config['listonly']: + generateImageDump( + config=config, + other=other, + images=images, + session=other['session']) if config['logs']: saveLogs(config=config, session=other['session']) From 93ec2e563697d4a29b917fe76f07da61f78b0710 Mon Sep 17 00:00:00 2001 From: Garrett Boast Date: Fri, 29 Sep 2017 21:05:25 -0400 Subject: [PATCH 2/2] http://skilledtests.com/wiki/ no longer exists http://skilledtests.com/wiki/ appears offline and is causings tests to fail. Looks domain name related. --- testing/test_dumpgenerator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/testing/test_dumpgenerator.py b/testing/test_dumpgenerator.py index 13916598..fdc946da 100644 --- a/testing/test_dumpgenerator.py +++ b/testing/test_dumpgenerator.py @@ -63,7 +63,7 @@ def test_getImages(self): # Alone wikis #['http://wiki.annotation.jp/index.php', 'http://wiki.annotation.jp/api.php', u'かずさアノテーション - ソーシャル・ゲノム・アノテーション.jpg'], ['http://archiveteam.org/index.php', 'http://archiveteam.org/api.php', u'Archive-is 2013-07-02 17-05-40.png'], - ['http://skilledtests.com/wiki/index.php', 'http://skilledtests.com/wiki/api.php', u'Benham\'s disc (animated).gif'], + #['http://skilledtests.com/wiki/index.php', 'http://skilledtests.com/wiki/api.php', u'Benham\'s disc (animated).gif'], # Editthis wikifarm # It has a page view limit @@ -147,7 +147,7 @@ def test_getPageTitles(self): tests = [ # Alone wikis ['http://archiveteam.org/index.php', 'http://archiveteam.org/api.php', u'April Fools\' Day'], - ['http://skilledtests.com/wiki/index.php', 'http://skilledtests.com/wiki/api.php', u'Conway\'s Game of Life'], + #['http://skilledtests.com/wiki/index.php', 'http://skilledtests.com/wiki/api.php', u'Conway\'s Game of Life'], # Test old allpages API behaviour #['http://wiki.damirsystems.com/index.php', 'http://wiki.damirsystems.com/api.php', 'SQL Server Tips'], @@ -206,7 +206,7 @@ def test_getWikiEngine(self): tests = [ ['https://www.dokuwiki.org', 'DokuWiki'], #['http://wiki.openwrt.org', 'DokuWiki'], - ['http://skilledtests.com/wiki/', 'MediaWiki'], + #['http://skilledtests.com/wiki/', 'MediaWiki'], #['http://moinmo.in', 'MoinMoin'], ['https://wiki.debian.org', 'MoinMoin'], ['http://twiki.org/cgi-bin/view/', 'TWiki'], @@ -274,7 +274,7 @@ def test_mwGetAPIAndIndex(self): tests = [ # Alone wikis ['http://archiveteam.org', 'http://archiveteam.org/api.php', 'http://archiveteam.org/index.php'], - ['http://skilledtests.com/wiki/', 'http://skilledtests.com/wiki/api.php', 'http://skilledtests.com/wiki/index.php'], + #['http://skilledtests.com/wiki/', 'http://skilledtests.com/wiki/api.php', 'http://skilledtests.com/wiki/index.php'], # Editthis wikifarm # It has a page view limit