Skip to content
47 changes: 30 additions & 17 deletions bagit.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
import tempfile
from datetime import date
from os import listdir
from os.path import abspath, isdir, isfile, join
from os.path import abspath, isdir, isfile, islink, join

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -75,7 +75,8 @@
BOM = BOM.decode('utf-8')


def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
def make_bag(bag_dir, bag_info=None, processes=1, checksum=None,
move_payload=True):
"""
Convert a given directory into a bag. You can pass in arbitrary
key/value pairs to put into the bag-info.txt metadata file as
Expand Down Expand Up @@ -107,24 +108,33 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
LOGGER.error("The following files do not have read permissions: \n%s", unreadable_files)
raise BagError("Read permissions are required to calculate file fixities.")
else:
LOGGER.info("creating data dir")
if move_payload:

cwd = os.getcwd()
temp_data = tempfile.mkdtemp(dir=cwd)
LOGGER.info("creating data dir")

for f in os.listdir('.'):
if os.path.abspath(f) == temp_data:
continue
new_f = os.path.join(temp_data, f)
LOGGER.info("moving %s to %s", f, new_f)
os.rename(f, new_f)
cwd = os.getcwd()
temp_data = tempfile.mkdtemp(dir=cwd)

LOGGER.info("moving %s to %s", temp_data, 'data')
os.rename(temp_data, 'data')
for f in os.listdir('.'):
if os.path.abspath(f) == temp_data:
continue
new_f = os.path.join(temp_data, f)
LOGGER.info("moving %s to %s", f, new_f)
os.rename(f, new_f)

LOGGER.info("moving %s to %s", temp_data, 'data')
os.rename(temp_data, 'data')

# permissions for the payload directory should match
# those of the original directory
os.chmod('data', os.stat(cwd).st_mode)

else:

# permissions for the payload directory should match those of the
# original directory
os.chmod('data', os.stat(cwd).st_mode)
if not (isdir('data') and not islink('data')):
raise BagError("Bag directory must have the payload subdirectory.")
if os.listdir('.') != ['data']:
raise BagError("Bag directory must not have any entries other then the payload subdirectory.")

for c in checksum:
LOGGER.info("writing manifest-%s.txt", c)
Expand Down Expand Up @@ -930,6 +940,8 @@ def _make_parser():
help='Generate SHA-256 manifest when creating a bag')
parser.add_argument('--sha512', action='append_const', dest='checksum', const='sha512',
help='Generate SHA-512 manifest when creating a bag')
parser.add_argument('--no-move-payload', action='store_false', dest='move_payload',
help='Do not move the payload in a subdirectory of the bag directory, assume the payload directory to be already set up')

for header in STANDARD_BAG_INFO_HEADERS:
parser.add_argument('--%s' % header.lower(), type=str,
Expand Down Expand Up @@ -983,7 +995,8 @@ def main():
try:
make_bag(bag_dir, bag_info=parser.bag_info,
processes=args.processes,
checksum=args.checksum)
checksum=args.checksum,
move_payload=args.move_payload)
except Exception as exc:
LOGGER.error("Failed to create bag in %s: %s", bag_dir, exc, exc_info=True)
rc = 1
Expand Down
65 changes: 65 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,5 +570,70 @@ def test_unicode_in_tags(self):
bag = bagit.Bag(self.tmpdir)
self.assertEqual(bag.info['test'], '♡')

class TestBagNoMovePayload(unittest.TestCase):
"""Tests for creating a bag with move_payload=False.
"""

def setUp(self):
self.tmpdir = tempfile.mkdtemp()
datadir = os.path.join(self.tmpdir, 'data')
shutil.copytree('test-data', datadir)

def tearDown(self):
if os.path.isdir(self.tmpdir):
shutil.rmtree(self.tmpdir)

def test_make_bag(self):
info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '[email protected]'}
bagit.make_bag(self.tmpdir, bag_info=info, move_payload=False)

# data dir should've been created
self.assertTrue(os.path.isdir(j(self.tmpdir, 'data')))

# check bagit.txt
self.assertTrue(os.path.isfile(j(self.tmpdir, 'bagit.txt')))
with open(j(self.tmpdir, 'bagit.txt')) as b:
bagit_txt = b.read()
self.assertTrue('BagIt-Version: 0.97' in bagit_txt)
self.assertTrue('Tag-File-Character-Encoding: UTF-8' in bagit_txt)

# check manifest
self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-md5.txt')))
with open(j(self.tmpdir, 'manifest-md5.txt')) as m:
manifest_txt = m.read()
self.assertTrue('8e2af7a0143c7b8f4de0b3fc90f27354 data/README' in manifest_txt)
self.assertTrue('9a2b89e9940fea6ac3a0cc71b0a933a0 data/loc/2478433644_2839c5e8b8_o_d.jpg' in manifest_txt)
self.assertTrue('6172e980c2767c12135e3b9d246af5a3 data/loc/3314493806_6f1db86d66_o_d.jpg' in manifest_txt)
self.assertTrue('38a84cd1c41de793a0bccff6f3ec8ad0 data/si/2584174182_ffd5c24905_b_d.jpg' in manifest_txt)
self.assertTrue('5580eaa31ad1549739de12df819e9af8 data/si/4011399822_65987a4806_b_d.jpg' in manifest_txt)

# check bag-info.txt
self.assertTrue(os.path.isfile(j(self.tmpdir, 'bag-info.txt')))
with open(j(self.tmpdir, 'bag-info.txt')) as bi:
bag_info_txt = bi.read()
self.assertTrue('Contact-Email: [email protected]' in bag_info_txt)
self.assertTrue('Bagging-Date: 1970-01-01' in bag_info_txt)
self.assertTrue('Payload-Oxum: 991765.5' in bag_info_txt)
self.assertTrue('Bag-Software-Agent: bagit.py v1.5.4 <http://github.com/libraryofcongress/bagit-python>' in bag_info_txt)

# check tagmanifest-md5.txt
self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-md5.txt')))
with open(j(self.tmpdir, 'tagmanifest-md5.txt')) as tm:
tagmanifest_txt = tm.read()
self.assertTrue('9e5ad981e0d29adc278f6a294b8c2aca bagit.txt' in tagmanifest_txt)
self.assertTrue('a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt' in tagmanifest_txt)
self.assertTrue('bfe59ad8af1a227d27c191b4178c399f bag-info.txt' in tagmanifest_txt)

def test_make_bag_err_no_payload(self):
os.rename(j(self.tmpdir, 'data'), j(self.tmpdir, 'foo'))
info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '[email protected]'}
self.assertRaises(bagit.BagError, bagit.make_bag, self.tmpdir, bag_info=info, move_payload=False)

def test_make_bag_err_spurious_files(self):
with open(j(self.tmpdir, 'bogus'), 'wt'):
pass
info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '[email protected]'}
self.assertRaises(bagit.BagError, bagit.make_bag, self.tmpdir, bag_info=info, move_payload=False)

if __name__ == '__main__':
unittest.main()