From d74efe01e7fc4eb4f14f4e4951ef5b85422519be Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 10 Sep 2016 11:00:51 +0100 Subject: [PATCH 1/7] Add option move_payload to make_bag(). --- bagit.py | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/bagit.py b/bagit.py index 7f9243c..d0d62fb 100755 --- a/bagit.py +++ b/bagit.py @@ -43,7 +43,7 @@ import tempfile from datetime import date from os import listdir -from os.path import abspath, isdir, isfile, join +from os.path import abspath, isdir, isfile, islink, join LOGGER = logging.getLogger(__name__) @@ -73,7 +73,8 @@ BOM = BOM.decode('utf-8') -def make_bag(bag_dir, bag_info=None, processes=1, checksum=None): +def make_bag(bag_dir, bag_info=None, processes=1, checksum=None, + move_payload=True): """ Convert a given directory into a bag. You can pass in arbitrary key/value pairs to put into the bag-info.txt metadata file as @@ -105,24 +106,32 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None): LOGGER.error("The following files do not have read permissions: \n%s", unreadable_files) raise BagError("Read permissions are required to calculate file fixities.") else: - LOGGER.info("creating data dir") + if move_payload: - cwd = os.getcwd() - temp_data = tempfile.mkdtemp(dir=cwd) + LOGGER.info("creating data dir") - for f in os.listdir('.'): - if os.path.abspath(f) == temp_data: - continue - new_f = os.path.join(temp_data, f) - LOGGER.info("moving %s to %s", f, new_f) - os.rename(f, new_f) + cwd = os.getcwd() + temp_data = tempfile.mkdtemp(dir=cwd) - LOGGER.info("moving %s to %s", temp_data, 'data') - os.rename(temp_data, 'data') + for f in os.listdir('.'): + if os.path.abspath(f) == temp_data: + continue + new_f = os.path.join(temp_data, f) + LOGGER.info("moving %s to %s", f, new_f) + os.rename(f, new_f) + + LOGGER.info("moving %s to %s", temp_data, 'data') + os.rename(temp_data, 'data') + + # permissions for the payload directory should match + # those of the original directory + os.chmod('data', os.stat(cwd).st_mode) + + else: - # permissions for the payload directory should match those of the - # original directory - os.chmod('data', os.stat(cwd).st_mode) + if not (os.listdir('.') == ['data'] and + isdir('data') and not islink('data')): + raise BagError("Bag directory must have the payload subdirectory and no other entries.") for c in checksum: LOGGER.info("writing manifest-%s.txt", c) From bfeff08ca86e0400230876ceb025171c8b59b0a8 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 10 Sep 2016 13:43:08 +0100 Subject: [PATCH 2/7] Add a command line argument --no-move-payload. --- bagit.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bagit.py b/bagit.py index d0d62fb..c08bc07 100755 --- a/bagit.py +++ b/bagit.py @@ -925,6 +925,8 @@ def _make_parser(): help='Generate SHA-256 manifest when creating a bag') parser.add_argument('--sha512', action='append_const', dest='checksum', const='sha512', help='Generate SHA-512 manifest when creating a bag') + parser.add_argument('--no-move-payload', action='store_false', dest='move_payload', + help='Do not move the payload in a subdirectory of the bag directory, assume the payload directory to be already set up') for header in STANDARD_BAG_INFO_HEADERS: parser.add_argument('--%s' % header.lower(), type=str, @@ -978,7 +980,8 @@ def main(): try: make_bag(bag_dir, bag_info=parser.bag_info, processes=args.processes, - checksum=args.checksum) + checksum=args.checksum, + move_payload=args.move_payload) except Exception as exc: LOGGER.error("Failed to create bag in %s: %s", bag_dir, exc, exc_info=True) rc = 1 From 6c90a47916ed1812f703fdb5093a14a646363098 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 12 Sep 2016 07:25:50 -0600 Subject: [PATCH 3/7] Add a test for make_bag() with move_payload set to False. --- test.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/test.py b/test.py index f9d6efd..610ea93 100644 --- a/test.py +++ b/test.py @@ -539,5 +539,61 @@ def test_unicode_in_tags(self): bag = bagit.Bag(self.tmpdir) self.assertEqual(bag.info['test'], '♡') +class TestBagNoMovePayload(unittest.TestCase): + """Same test as test_make_bag (test.TestBag), but with the payload + in the right place in the data directory already from the + beginning and setting move_payload=False. + """ + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + datadir = os.path.join(self.tmpdir, 'data') + shutil.copytree('test-data', datadir) + + def tearDown(self): + if os.path.isdir(self.tmpdir): + shutil.rmtree(self.tmpdir) + + def test_make_bag(self): + info = {'Bagging-Date': '1970-01-01', 'Contact-Email': 'ehs@pobox.com'} + bagit.make_bag(self.tmpdir, bag_info=info, move_payload=False) + + # data dir should've been created + self.assertTrue(os.path.isdir(j(self.tmpdir, 'data'))) + + # check bagit.txt + self.assertTrue(os.path.isfile(j(self.tmpdir, 'bagit.txt'))) + with open(j(self.tmpdir, 'bagit.txt')) as b: + bagit_txt = b.read() + self.assertTrue('BagIt-Version: 0.97' in bagit_txt) + self.assertTrue('Tag-File-Character-Encoding: UTF-8' in bagit_txt) + + # check manifest + self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-md5.txt'))) + with open(j(self.tmpdir, 'manifest-md5.txt')) as m: + manifest_txt = m.read() + self.assertTrue('8e2af7a0143c7b8f4de0b3fc90f27354 data/README' in manifest_txt) + self.assertTrue('9a2b89e9940fea6ac3a0cc71b0a933a0 data/loc/2478433644_2839c5e8b8_o_d.jpg' in manifest_txt) + self.assertTrue('6172e980c2767c12135e3b9d246af5a3 data/loc/3314493806_6f1db86d66_o_d.jpg' in manifest_txt) + self.assertTrue('38a84cd1c41de793a0bccff6f3ec8ad0 data/si/2584174182_ffd5c24905_b_d.jpg' in manifest_txt) + self.assertTrue('5580eaa31ad1549739de12df819e9af8 data/si/4011399822_65987a4806_b_d.jpg' in manifest_txt) + + # check bag-info.txt + self.assertTrue(os.path.isfile(j(self.tmpdir, 'bag-info.txt'))) + with open(j(self.tmpdir, 'bag-info.txt')) as bi: + bag_info_txt = bi.read() + self.assertTrue('Contact-Email: ehs@pobox.com' in bag_info_txt) + self.assertTrue('Bagging-Date: 1970-01-01' in bag_info_txt) + self.assertTrue('Payload-Oxum: 991765.5' in bag_info_txt) + self.assertTrue('Bag-Software-Agent: bagit.py ' in bag_info_txt) + + # check tagmanifest-md5.txt + self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-md5.txt'))) + with open(j(self.tmpdir, 'tagmanifest-md5.txt')) as tm: + tagmanifest_txt = tm.read() + self.assertTrue('9e5ad981e0d29adc278f6a294b8c2aca bagit.txt' in tagmanifest_txt) + self.assertTrue('a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt' in tagmanifest_txt) + self.assertTrue('6a5090e27cb29d5dda8a0142fbbdf37e bag-info.txt' in tagmanifest_txt) + if __name__ == '__main__': unittest.main() From 9d96403b8f871e8db5d29053598fece2f588fbb3 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 12 Sep 2016 20:42:43 -0600 Subject: [PATCH 4/7] Add tests for error conditions with move_payload set to False. --- test.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/test.py b/test.py index 610ea93..5880b45 100644 --- a/test.py +++ b/test.py @@ -540,9 +540,7 @@ def test_unicode_in_tags(self): self.assertEqual(bag.info['test'], '♡') class TestBagNoMovePayload(unittest.TestCase): - """Same test as test_make_bag (test.TestBag), but with the payload - in the right place in the data directory already from the - beginning and setting move_payload=False. + """Tests for creating a bag with move_payload=False. """ def setUp(self): @@ -595,5 +593,18 @@ def test_make_bag(self): self.assertTrue('a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt' in tagmanifest_txt) self.assertTrue('6a5090e27cb29d5dda8a0142fbbdf37e bag-info.txt' in tagmanifest_txt) + def test_make_bag_err_no_payload(self): + os.rename(j(self.tmpdir, 'data'), j(self.tmpdir, 'foo')) + info = {'Bagging-Date': '1970-01-01', 'Contact-Email': 'ehs@pobox.com'} + with self.assertRaises(bagit.BagError): + bagit.make_bag(self.tmpdir, bag_info=info, move_payload=False) + + def test_make_bag_err_spurious_files(self): + with open(j(self.tmpdir, 'bogus'), 'wt'): + pass + info = {'Bagging-Date': '1970-01-01', 'Contact-Email': 'ehs@pobox.com'} + with self.assertRaises(bagit.BagError): + bagit.make_bag(self.tmpdir, bag_info=info, move_payload=False) + if __name__ == '__main__': unittest.main() From dac6f4b737ca819da3172aafc3fbe1a738986673 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 13 Sep 2016 07:42:52 -0600 Subject: [PATCH 5/7] Move different error conditions in separate if statements. --- bagit.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bagit.py b/bagit.py index c08bc07..2f7c25a 100755 --- a/bagit.py +++ b/bagit.py @@ -129,9 +129,10 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None, else: - if not (os.listdir('.') == ['data'] and - isdir('data') and not islink('data')): - raise BagError("Bag directory must have the payload subdirectory and no other entries.") + if not (isdir('data') and not islink('data')): + raise BagError("Bag directory must have the payload subdirectory.") + if os.listdir('.') != ['data']: + raise BagError("Bag directory must not have any entries other then the payload subdirectory.") for c in checksum: LOGGER.info("writing manifest-%s.txt", c) From 42bf5f21994cd05a8b310aae0d6ccd94ee8684b4 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 20 Sep 2016 12:36:04 +0200 Subject: [PATCH 6/7] Fix test: assertRaises() does not return a context manager with Python 2.6. --- test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test.py b/test.py index 5880b45..8c5b628 100644 --- a/test.py +++ b/test.py @@ -596,15 +596,13 @@ def test_make_bag(self): def test_make_bag_err_no_payload(self): os.rename(j(self.tmpdir, 'data'), j(self.tmpdir, 'foo')) info = {'Bagging-Date': '1970-01-01', 'Contact-Email': 'ehs@pobox.com'} - with self.assertRaises(bagit.BagError): - bagit.make_bag(self.tmpdir, bag_info=info, move_payload=False) + self.assertRaises(bagit.BagError, bagit.make_bag, self.tmpdir, bag_info=info, move_payload=False) def test_make_bag_err_spurious_files(self): with open(j(self.tmpdir, 'bogus'), 'wt'): pass info = {'Bagging-Date': '1970-01-01', 'Contact-Email': 'ehs@pobox.com'} - with self.assertRaises(bagit.BagError): - bagit.make_bag(self.tmpdir, bag_info=info, move_payload=False) + self.assertRaises(bagit.BagError, bagit.make_bag, self.tmpdir, bag_info=info, move_payload=False) if __name__ == '__main__': unittest.main() From 81f57f4713c536c3bfc4d0a88f5f5c84e88248ff Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 22 Oct 2016 20:41:42 +0200 Subject: [PATCH 7/7] Adapt test for including version in bag-info.txt --- test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index 2cb714e..3f3eb8e 100644 --- a/test.py +++ b/test.py @@ -614,7 +614,7 @@ def test_make_bag(self): self.assertTrue('Contact-Email: ehs@pobox.com' in bag_info_txt) self.assertTrue('Bagging-Date: 1970-01-01' in bag_info_txt) self.assertTrue('Payload-Oxum: 991765.5' in bag_info_txt) - self.assertTrue('Bag-Software-Agent: bagit.py ' in bag_info_txt) + self.assertTrue('Bag-Software-Agent: bagit.py v1.5.4 ' in bag_info_txt) # check tagmanifest-md5.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-md5.txt'))) @@ -622,7 +622,7 @@ def test_make_bag(self): tagmanifest_txt = tm.read() self.assertTrue('9e5ad981e0d29adc278f6a294b8c2aca bagit.txt' in tagmanifest_txt) self.assertTrue('a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt' in tagmanifest_txt) - self.assertTrue('6a5090e27cb29d5dda8a0142fbbdf37e bag-info.txt' in tagmanifest_txt) + self.assertTrue('bfe59ad8af1a227d27c191b4178c399f bag-info.txt' in tagmanifest_txt) def test_make_bag_err_no_payload(self): os.rename(j(self.tmpdir, 'data'), j(self.tmpdir, 'foo'))