This repository was archived by the owner on Mar 7, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport.py
64 lines (52 loc) · 1.46 KB
/
import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pymongo
import sys, re
import json
mongo = ("localhost", 27017)
def iter_json(filename):
"""
Generator to read a JSON file line by line
useful to read big files without eating up RAM.
"""
with open(filename, "r") as f:
for jsonline in f:
yield json.loads(jsonline.strip("[").strip("]").strip("\n").strip(","))
def get_jsonlines(filename, n = 5):
"""
Return a list with n lines
from a JSON file.
"""
data = []
for json in iter_json(filename):
if len(data) < n:
data.append(json)
else:
return data
def mongo_import(db_name, col_name, filename, overwrite = True, v = (False, 0)):
"""
Imports JSON documents from a file to MongoDB.
db_name: name of the database
col_name: name of the collection
filename: name of the source file
overwrite: boolean, self-explaining
v: verbose, tuple containing a boolean and an int
representing the interval
"""
client = pymongo.MongoClient(mongo[0], mongo[1])
collection = client[db_name][col_name]
c = 0
if overwrite:
collection.drop()
for json in iter_json(filename):
collection.insert_one(json)
if v[0] and c % v[1] == 0:
print c, " documents added."
c += 1
print c, " documents added."
if __name__ == "__main__":
try:
db_name = sys.argv[1]
col_name = sys.argv[2]
filename = sys.argv[3]
mongo_import(db_name, col_name, filename, overwrite = True, v = (True, 10000))
except IndexError:
print "usage of this program:\n\tpython import.py [database] [collection] [filename]"