Skip to content

Commit 3b7d961

Browse files
committed
chore: refactoring Time_project_info
1 parent b35706c commit 3b7d961

File tree

2 files changed

+45
-59
lines changed

2 files changed

+45
-59
lines changed

oscar.py

+41-56
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@
1414
import time
1515
import warnings
1616

17+
import clickhouse_driver as clickhouse
18+
import six
1719
from tokyocabinet import hash as tch
1820

19-
import clickhouse_driver as clickhouse
2021

2122

2223
__version__ = '1.3.3'
@@ -450,7 +451,7 @@ def all(cls):
450451
all file names.
451452
452453
Yields:
453-
(Project): project
454+
Project: a project
454455
"""
455456
if not cls._keys_registry_dtype:
456457
raise NotImplemented
@@ -640,7 +641,6 @@ def commits(self):
640641
return (Commit(bin_sha) for bin_sha in self.commit_shas)
641642

642643

643-
644644
class Tree(GitObject):
645645
""" A representation of git tree object, basically - a directory.
646646
@@ -731,7 +731,8 @@ def traverse(self):
731731
This will generate 3-tuples of the same format as direct tree
732732
iteration, but will recursively include subtrees content.
733733
734-
:return: generator of (mode, filename, blob/tree sha)
734+
Yields:
735+
Tuple[str, str, str]: (mode, filename, blob/tree sha)
735736
736737
>>> c = Commit("1e971a073f40d74a1e72e07c682e1cba0bae159b")
737738
>>> len(list(c.tree.traverse()))
@@ -1478,8 +1479,7 @@ def query(self, query_str):
14781479
return self.client.execute(query_str)
14791480

14801481
def query_iter(self, query_str):
1481-
row_iter = self.client.execute_iter(query_str)
1482-
for row in row_iter:
1482+
for row in self.client.execute_iter(query_str):
14831483
yield row
14841484

14851485
def query_select(self, s_col, s_from, s_start, s_end):
@@ -1492,12 +1492,12 @@ def query_select_iter(self, s_col, s_from, s_start, s_end):
14921492
# iterative query
14931493
s_where = self.__where_condition(s_start, s_end)
14941494
query_str = 'select {} from {} where {}'.format(s_col, s_from, s_where)
1495-
row_iter = self.client.execute_iter(query_str)
1496-
for row in row_iter:
1495+
for row in self.client.execute_iter(query_str):
14971496
yield row
14981497

14991498
def __where_condition(self, start, end):
1500-
# checks if start and end date or time is valid and build the where clause
1499+
# checks if start and end date or time is valid and build the where
1500+
# clause
15011501
dt = 'time'
15021502
if not self.__check_time(start, end):
15031503
dt = 'date'
@@ -1506,27 +1506,28 @@ def __where_condition(self, start, end):
15061506

15071507
if end is None:
15081508
return '{}={}'.format(dt, start)
1509-
else:
1510-
return '{}>={} AND {}<={}'.format(dt, start, dt, end)
1509+
return '{}>={} AND {}<={}'.format(dt, start, dt, end)
15111510

15121511
def __check_time(self, start, end):
1513-
# make sure start and end are of the same type and must be either strings or ints
1512+
# make sure start and end are of the same type and must be either
1513+
# strings or ints
15141514
if start is None:
15151515
raise ValueError('start time cannot be None')
1516-
elif not isinstance(start, int) and not isinstance(start, basestring):
1516+
if not isinstance(start, (int, six.string_types)):
15171517
raise ValueError('start time must be either int or string')
1518-
elif end is not None and not isinstance(end, int) and not isinstance(end, basestring):
1518+
if end is not None and not isinstance(end, (int, six.string_types)):
15191519
raise ValueError('end time must be either int or string')
1520-
elif end is not None and type(start) is not type(end):
1520+
if end is not None and type(start) is not type(end):
15211521
raise ValueError('start and end must be of the same type')
1522-
return (True if isinstance(start, int) else False)
1522+
return isinstance(start, int)
15231523

15241524

15251525
class Time_commit_info(Clickhouse_DB):
1526-
""" Time_commit_info class is initialized with table name and database host name
1527-
the default table for commits is commits_all, and the default host is localhost
1528-
No connection is established before the query is made.
1529-
The 'commits_all' table description is the following:
1526+
""" Time_commit_info class is initialized with table name and database host
1527+
name the default table for commits is commits_all, and the default host is
1528+
localhost No connection is established before the query is made.
1529+
1530+
The 'commits_all' table description is the following:
15301531
|__name___|______type_______|
15311532
| sha1 | FixedString(20) |
15321533
| time | Int32 |
@@ -1547,10 +1548,9 @@ def commit_counts(self, start, end=None):
15471548
>>> t.commit_counts(1568656268)
15481549
8
15491550
"""
1550-
rows = self.query_select('count(*)', self.tb_name, start, end)
1551-
return rows[0][0]
1551+
return self.query_select('count(*)', self.tb_name, start, end)[0][0]
15521552

1553-
def commits_iter(self, start, end=None):
1553+
def commits(self, start, end=None):
15541554
""" return a generator of Commit instances within a given date and time
15551555
>>> t = Time_commit_info()
15561556
>>> commits = t.commits_iter(1568656268)
@@ -1560,34 +1560,25 @@ def commits_iter(self, start, end=None):
15601560
>>> c.parent_shas
15611561
('9c4cc4f6f8040ed98388c7dedeb683469f7210f5',)
15621562
"""
1563-
row_iter = self.query_select_iter('lower(hex(sha1))', self.tb_name, start, end)
1564-
for row in row_iter:
1565-
yield Commit(row[0])
1563+
for sha in self.commits_shas(start, end):
1564+
yield Commit(sha)
15661565

15671566
def commits_shas(self, start, end=None):
1568-
""" return a list of shas within the given time and date
1569-
>>> t = Time_commit_info()
1570-
>>> shas = t.commits_shas(1568656268)
1571-
>>> type(shas)
1572-
<type 'list'>
1573-
"""
1574-
rows = self.query_select('lower(hex(sha1))', self.tb_name, start, end)
1575-
return [row[0] for row in rows]
1576-
1577-
def commits_shas_iter(self, start, end=None):
15781567
""" return a generator of all sha1 within the given time and date
15791568
>>> t = Time_commit_info()
1580-
>>> for sha1 in t.commits_shas_iter(1568656268):
1569+
>>> for sha1 in t.commits_shas(1568656268):
15811570
... print(sha1)
15821571
"""
1583-
row_iter = self.query_select_iter('lower(hex(sha1))', self.tb_name, start, end)
1584-
for row in row_iter:
1572+
for row in self.query_select_iter(
1573+
'lower(hex(sha1))', self.tb_name, start, end):
15851574
yield row[0]
1586-
1575+
1576+
15871577
class Time_project_info(Clickhouse_DB):
1588-
""" Time_project_info class is initialized with table name and database host name
1589-
The default table name for projects is projects_all, and the default database name is localhost
1590-
This class contains methods to query for project data
1578+
""" Time_project_info class is initialized with table name and database host
1579+
name. The default table name for projects is projects_all, and the default
1580+
database name is localhost. This class contains methods to query for project
1581+
data.
15911582
The 'projects_all' table descrption is the following:
15921583
|___name___|______type_______|
15931584
| sha1 | FixedString(20) |
@@ -1617,10 +1608,8 @@ def get_values_iter(self, cols, start, end):
16171608
...
16181609
"""
16191610
cols = self.__wrap_cols(cols)
1620-
rows_iter = self.query_select_iter(', '.join(cols), self.tb_name, start, end)
1621-
for row in rows_iter:
1622-
yield row
1623-
1611+
return self.query_select_iter(', '.join(cols), self.tb_name, start, end)
1612+
16241613
def project_timeline(self, cols, repo):
16251614
""" return a generator for all rows given a repo name (ordered by time)
16261615
>>> rows = p.project_timeline(['time','repo'], 'mrtrevanderson_CECS_424')
@@ -1635,13 +1624,12 @@ def project_timeline(self, cols, repo):
16351624
cols = self.__wrap_cols(cols)
16361625
query_str = 'SELECT {} FROM {} WHERE repo=\'{}\' ORDER BY time'\
16371626
.format(', '.join(cols), self.tb_name, repo)
1638-
rows_iter = self.query_iter(query_str)
1639-
for row in rows_iter:
1640-
yield row
1627+
return self.query_iter(query_str)
16411628

16421629
def author_timeline(self, cols, author):
16431630
""" return a generator for all rows given an author (ordered by time)
1644-
>>> rows = p.author_timeline(['time', 'repo'], 'Andrew Gacek <[email protected]>')
1631+
>>> rows = p.author_timeline(
1632+
... ['time', 'repo'], 'Andrew Gacek <[email protected]>')
16451633
>>> for row in rows:
16461634
... print(row)
16471635
...
@@ -1653,13 +1641,10 @@ def author_timeline(self, cols, author):
16531641
cols = self.__wrap_cols(cols)
16541642
query_str = 'SELECT {} FROM {} WHERE author=\'{}\' ORDER BY time'\
16551643
.format(', '.join(cols), self.tb_name, author)
1656-
rows_iter = self.query_iter(query_str)
1657-
for row in rows_iter:
1658-
yield row
1644+
return self.query_iter(query_str)
16591645

16601646
def __wrap_cols(self, cols):
1661-
""" wraps cols to select before querying
1662-
"""
1647+
""" wraps cols to select before querying """
16631648
for i in range(len(cols)):
16641649
if cols[i] == 'sha1' or cols[i] == 'blob':
16651650
cols[i] = 'lower(hex({}))'.format(cols[i])

requirements.txt

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
pygit2
1+
clickhouse-driver
2+
fnvhash
23
gitdb2==2.0.6
4+
pygit2
35
python-lzf
6+
six
47
tokyocabinet
5-
fnvhash
6-
clickhouse-driver

0 commit comments

Comments
 (0)