-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess.py
More file actions
executable file
·50 lines (42 loc) · 1.88 KB
/
process.py
File metadata and controls
executable file
·50 lines (42 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
import sys
import argparse
from condm.controller import ContentdmProcessor
from existdb.controller import ExistProcessor
# Allowed repo names are 'cdm' and 'exist-db'.
#
# command line arguments
# process.py <repo> <collection> <source_file or directory> <outputdirectory>
#
# example1 ./process.py cdm manuscripts archives_manuscripts.xml archives_manuscripts --dry-run
# example2 ./process.py exist collegian collegian collegian
#
# example1 uses a source file; example2 uses a source directory.
#
# NOTE: You must provide an empty output directory. E.g. saf/archives_manuscripts.
# This directory must be empty.
#
# See processor classes for more details.
# The source repository
parser = argparse.ArgumentParser(description='Process exported collection data to saf. The supported repositories are CONTENTdm and the WU eXist-db METS/ALTO collections.')
parser.add_argument('repo', metavar='repo', type=str,
help='the repository name (cdm | exist)')
parser.add_argument('collection', metavar='collection', type=str,
help='the repository collection name')
parser.add_argument('source_file', metavar='source_file', type=str,
help='the exported xml data source')
parser.add_argument('saf_dir', metavar='saf_directory', type=str,
help='the parent saf target directory')
parser.add_argument("-d", "--dry-run", action="store_true",
help="Dry run displays collection analytics only. No data is processed.")
args = parser.parse_args()
repo = args.repo
dry_run = False
if args.dry_run:
dry_run = True
if repo == 'cdm':
controller = ContentdmProcessor(args.collection, args.source_file, args.saf_dir, dry_run)
controller.process_collections()
if repo == 'exist':
controller = ExistProcessor(args.collection, args.source_file, args.saf_dir, dry_run)
controller.process_records()