Skip to content

Add an option to not to move the payload into a subdirectory on bag create #67

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
47 changes: 30 additions & 17 deletions bagit.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
import tempfile
from datetime import date
from os import listdir
from os.path import abspath, isdir, isfile, join
from os.path import abspath, isdir, isfile, islink, join

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -75,7 +75,8 @@
BOM = BOM.decode('utf-8')


def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
def make_bag(bag_dir, bag_info=None, processes=1, checksum=None,
move_payload=True):
"""
Convert a given directory into a bag. You can pass in arbitrary
key/value pairs to put into the bag-info.txt metadata file as
Expand Down Expand Up @@ -107,24 +108,33 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
LOGGER.error("The following files do not have read permissions: \n%s", unreadable_files)
raise BagError("Read permissions are required to calculate file fixities.")
else:
LOGGER.info("creating data dir")
if move_payload:

cwd = os.getcwd()
temp_data = tempfile.mkdtemp(dir=cwd)
LOGGER.info("creating data dir")

for f in os.listdir('.'):
if os.path.abspath(f) == temp_data:
continue
new_f = os.path.join(temp_data, f)
LOGGER.info("moving %s to %s", f, new_f)
os.rename(f, new_f)
cwd = os.getcwd()
temp_data = tempfile.mkdtemp(dir=cwd)

LOGGER.info("moving %s to %s", temp_data, 'data')
os.rename(temp_data, 'data')
for f in os.listdir('.'):
if os.path.abspath(f) == temp_data:
continue
new_f = os.path.join(temp_data, f)
LOGGER.info("moving %s to %s", f, new_f)
os.rename(f, new_f)

LOGGER.info("moving %s to %s", temp_data, 'data')
os.rename(temp_data, 'data')

# permissions for the payload directory should match
# those of the original directory
os.chmod('data', os.stat(cwd).st_mode)

else:

# permissions for the payload directory should match those of the
# original directory
os.chmod('data', os.stat(cwd).st_mode)
if not (isdir('data') and not islink('data')):
raise BagError("Bag directory must have the payload subdirectory.")
if os.listdir('.') != ['data']:
raise BagError("Bag directory must not have any entries other then the payload subdirectory.")

for c in checksum:
LOGGER.info("writing manifest-%s.txt", c)
Expand Down Expand Up @@ -930,6 +940,8 @@ def _make_parser():
help='Generate SHA-256 manifest when creating a bag')
parser.add_argument('--sha512', action='append_const', dest='checksum', const='sha512',
help='Generate SHA-512 manifest when creating a bag')
parser.add_argument('--no-move-payload', action='store_false', dest='move_payload',
help='Do not move the payload in a subdirectory of the bag directory, assume the payload directory to be already set up')

for header in STANDARD_BAG_INFO_HEADERS:
parser.add_argument('--%s' % header.lower(), type=str,
Expand Down Expand Up @@ -983,7 +995,8 @@ def main():
try:
make_bag(bag_dir, bag_info=parser.bag_info,
processes=args.processes,
checksum=args.checksum)
checksum=args.checksum,
move_payload=args.move_payload)
except Exception as exc:
LOGGER.error("Failed to create bag in %s: %s", bag_dir, exc, exc_info=True)
rc = 1
Expand Down
65 changes: 65 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,5 +570,70 @@ def test_unicode_in_tags(self):
bag = bagit.Bag(self.tmpdir)
self.assertEqual(bag.info['test'], '♡')

class TestBagNoMovePayload(unittest.TestCase):
"""Tests for creating a bag with move_payload=False.
"""

def setUp(self):
self.tmpdir = tempfile.mkdtemp()
datadir = os.path.join(self.tmpdir, 'data')
shutil.copytree('test-data', datadir)

def tearDown(self):
if os.path.isdir(self.tmpdir):
shutil.rmtree(self.tmpdir)

def test_make_bag(self):
info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '[email protected]'}
bagit.make_bag(self.tmpdir, bag_info=info, move_payload=False)

# data dir should've been created
self.assertTrue(os.path.isdir(j(self.tmpdir, 'data')))

# check bagit.txt
self.assertTrue(os.path.isfile(j(self.tmpdir, 'bagit.txt')))
with open(j(self.tmpdir, 'bagit.txt')) as b:
bagit_txt = b.read()
self.assertTrue('BagIt-Version: 0.97' in bagit_txt)
self.assertTrue('Tag-File-Character-Encoding: UTF-8' in bagit_txt)

# check manifest
self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-md5.txt')))
with open(j(self.tmpdir, 'manifest-md5.txt')) as m:
manifest_txt = m.read()
self.assertTrue('8e2af7a0143c7b8f4de0b3fc90f27354 data/README' in manifest_txt)
self.assertTrue('9a2b89e9940fea6ac3a0cc71b0a933a0 data/loc/2478433644_2839c5e8b8_o_d.jpg' in manifest_txt)
self.assertTrue('6172e980c2767c12135e3b9d246af5a3 data/loc/3314493806_6f1db86d66_o_d.jpg' in manifest_txt)
self.assertTrue('38a84cd1c41de793a0bccff6f3ec8ad0 data/si/2584174182_ffd5c24905_b_d.jpg' in manifest_txt)
self.assertTrue('5580eaa31ad1549739de12df819e9af8 data/si/4011399822_65987a4806_b_d.jpg' in manifest_txt)

# check bag-info.txt
self.assertTrue(os.path.isfile(j(self.tmpdir, 'bag-info.txt')))
with open(j(self.tmpdir, 'bag-info.txt')) as bi:
bag_info_txt = bi.read()
self.assertTrue('Contact-Email: [email protected]' in bag_info_txt)
self.assertTrue('Bagging-Date: 1970-01-01' in bag_info_txt)
self.assertTrue('Payload-Oxum: 991765.5' in bag_info_txt)
self.assertTrue('Bag-Software-Agent: bagit.py v1.5.4 <http://github.com/libraryofcongress/bagit-python>' in bag_info_txt)

# check tagmanifest-md5.txt
self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-md5.txt')))
with open(j(self.tmpdir, 'tagmanifest-md5.txt')) as tm:
tagmanifest_txt = tm.read()
self.assertTrue('9e5ad981e0d29adc278f6a294b8c2aca bagit.txt' in tagmanifest_txt)
self.assertTrue('a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt' in tagmanifest_txt)
self.assertTrue('bfe59ad8af1a227d27c191b4178c399f bag-info.txt' in tagmanifest_txt)

def test_make_bag_err_no_payload(self):
os.rename(j(self.tmpdir, 'data'), j(self.tmpdir, 'foo'))
info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '[email protected]'}
self.assertRaises(bagit.BagError, bagit.make_bag, self.tmpdir, bag_info=info, move_payload=False)

def test_make_bag_err_spurious_files(self):
with open(j(self.tmpdir, 'bogus'), 'wt'):
pass
info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '[email protected]'}
self.assertRaises(bagit.BagError, bagit.make_bag, self.tmpdir, bag_info=info, move_payload=False)

if __name__ == '__main__':
unittest.main()