14
14
import time
15
15
import warnings
16
16
17
+ import clickhouse_driver as clickhouse
18
+ import six
17
19
from tokyocabinet import hash as tch
18
20
19
- import clickhouse_driver as clickhouse
20
21
21
22
22
23
__version__ = '1.3.3'
@@ -450,7 +451,7 @@ def all(cls):
450
451
all file names.
451
452
452
453
Yields:
453
- ( Project): project
454
+ Project: a project
454
455
"""
455
456
if not cls ._keys_registry_dtype :
456
457
raise NotImplemented
@@ -640,7 +641,6 @@ def commits(self):
640
641
return (Commit (bin_sha ) for bin_sha in self .commit_shas )
641
642
642
643
643
-
644
644
class Tree (GitObject ):
645
645
""" A representation of git tree object, basically - a directory.
646
646
@@ -731,7 +731,8 @@ def traverse(self):
731
731
This will generate 3-tuples of the same format as direct tree
732
732
iteration, but will recursively include subtrees content.
733
733
734
- :return: generator of (mode, filename, blob/tree sha)
734
+ Yields:
735
+ Tuple[str, str, str]: (mode, filename, blob/tree sha)
735
736
736
737
>>> c = Commit("1e971a073f40d74a1e72e07c682e1cba0bae159b")
737
738
>>> len(list(c.tree.traverse()))
@@ -1478,8 +1479,7 @@ def query(self, query_str):
1478
1479
return self .client .execute (query_str )
1479
1480
1480
1481
def query_iter (self , query_str ):
1481
- row_iter = self .client .execute_iter (query_str )
1482
- for row in row_iter :
1482
+ for row in self .client .execute_iter (query_str ):
1483
1483
yield row
1484
1484
1485
1485
def query_select (self , s_col , s_from , s_start , s_end ):
@@ -1492,12 +1492,12 @@ def query_select_iter(self, s_col, s_from, s_start, s_end):
1492
1492
# iterative query
1493
1493
s_where = self .__where_condition (s_start , s_end )
1494
1494
query_str = 'select {} from {} where {}' .format (s_col , s_from , s_where )
1495
- row_iter = self .client .execute_iter (query_str )
1496
- for row in row_iter :
1495
+ for row in self .client .execute_iter (query_str ):
1497
1496
yield row
1498
1497
1499
1498
def __where_condition (self , start , end ):
1500
- # checks if start and end date or time is valid and build the where clause
1499
+ # checks if start and end date or time is valid and build the where
1500
+ # clause
1501
1501
dt = 'time'
1502
1502
if not self .__check_time (start , end ):
1503
1503
dt = 'date'
@@ -1506,27 +1506,28 @@ def __where_condition(self, start, end):
1506
1506
1507
1507
if end is None :
1508
1508
return '{}={}' .format (dt , start )
1509
- else :
1510
- return '{}>={} AND {}<={}' .format (dt , start , dt , end )
1509
+ return '{}>={} AND {}<={}' .format (dt , start , dt , end )
1511
1510
1512
1511
def __check_time (self , start , end ):
1513
- # make sure start and end are of the same type and must be either strings or ints
1512
+ # make sure start and end are of the same type and must be either
1513
+ # strings or ints
1514
1514
if start is None :
1515
1515
raise ValueError ('start time cannot be None' )
1516
- elif not isinstance (start , int ) and not isinstance ( start , basestring ):
1516
+ if not isinstance (start , ( int , six . string_types ) ):
1517
1517
raise ValueError ('start time must be either int or string' )
1518
- elif end is not None and not isinstance (end , int ) and not isinstance ( end , basestring ):
1518
+ if end is not None and not isinstance (end , ( int , six . string_types ) ):
1519
1519
raise ValueError ('end time must be either int or string' )
1520
- elif end is not None and type (start ) is not type (end ):
1520
+ if end is not None and type (start ) is not type (end ):
1521
1521
raise ValueError ('start and end must be of the same type' )
1522
- return ( True if isinstance (start , int ) else False )
1522
+ return isinstance (start , int )
1523
1523
1524
1524
1525
1525
class Time_commit_info (Clickhouse_DB ):
1526
- """ Time_commit_info class is initialized with table name and database host name
1527
- the default table for commits is commits_all, and the default host is localhost
1528
- No connection is established before the query is made.
1529
- The 'commits_all' table description is the following:
1526
+ """ Time_commit_info class is initialized with table name and database host
1527
+ name the default table for commits is commits_all, and the default host is
1528
+ localhost No connection is established before the query is made.
1529
+
1530
+ The 'commits_all' table description is the following:
1530
1531
|__name___|______type_______|
1531
1532
| sha1 | FixedString(20) |
1532
1533
| time | Int32 |
@@ -1547,10 +1548,9 @@ def commit_counts(self, start, end=None):
1547
1548
>>> t.commit_counts(1568656268)
1548
1549
8
1549
1550
"""
1550
- rows = self .query_select ('count(*)' , self .tb_name , start , end )
1551
- return rows [0 ][0 ]
1551
+ return self .query_select ('count(*)' , self .tb_name , start , end )[0 ][0 ]
1552
1552
1553
- def commits_iter (self , start , end = None ):
1553
+ def commits (self , start , end = None ):
1554
1554
""" return a generator of Commit instances within a given date and time
1555
1555
>>> t = Time_commit_info()
1556
1556
>>> commits = t.commits_iter(1568656268)
@@ -1560,34 +1560,25 @@ def commits_iter(self, start, end=None):
1560
1560
>>> c.parent_shas
1561
1561
('9c4cc4f6f8040ed98388c7dedeb683469f7210f5',)
1562
1562
"""
1563
- row_iter = self .query_select_iter ('lower(hex(sha1))' , self .tb_name , start , end )
1564
- for row in row_iter :
1565
- yield Commit (row [0 ])
1563
+ for sha in self .commits_shas (start , end ):
1564
+ yield Commit (sha )
1566
1565
1567
1566
def commits_shas (self , start , end = None ):
1568
- """ return a list of shas within the given time and date
1569
- >>> t = Time_commit_info()
1570
- >>> shas = t.commits_shas(1568656268)
1571
- >>> type(shas)
1572
- <type 'list'>
1573
- """
1574
- rows = self .query_select ('lower(hex(sha1))' , self .tb_name , start , end )
1575
- return [row [0 ] for row in rows ]
1576
-
1577
- def commits_shas_iter (self , start , end = None ):
1578
1567
""" return a generator of all sha1 within the given time and date
1579
1568
>>> t = Time_commit_info()
1580
- >>> for sha1 in t.commits_shas_iter (1568656268):
1569
+ >>> for sha1 in t.commits_shas (1568656268):
1581
1570
... print(sha1)
1582
1571
"""
1583
- row_iter = self .query_select_iter ('lower(hex(sha1))' , self . tb_name , start , end )
1584
- for row in row_iter :
1572
+ for row in self .query_select_iter (
1573
+ 'lower(hex(sha1))' , self . tb_name , start , end ) :
1585
1574
yield row [0 ]
1586
-
1575
+
1576
+
1587
1577
class Time_project_info (Clickhouse_DB ):
1588
- """ Time_project_info class is initialized with table name and database host name
1589
- The default table name for projects is projects_all, and the default database name is localhost
1590
- This class contains methods to query for project data
1578
+ """ Time_project_info class is initialized with table name and database host
1579
+ name. The default table name for projects is projects_all, and the default
1580
+ database name is localhost. This class contains methods to query for project
1581
+ data.
1591
1582
The 'projects_all' table descrption is the following:
1592
1583
|___name___|______type_______|
1593
1584
| sha1 | FixedString(20) |
@@ -1617,10 +1608,8 @@ def get_values_iter(self, cols, start, end):
1617
1608
...
1618
1609
"""
1619
1610
cols = self .__wrap_cols (cols )
1620
- rows_iter = self .query_select_iter (', ' .join (cols ), self .tb_name , start , end )
1621
- for row in rows_iter :
1622
- yield row
1623
-
1611
+ return self .query_select_iter (', ' .join (cols ), self .tb_name , start , end )
1612
+
1624
1613
def project_timeline (self , cols , repo ):
1625
1614
""" return a generator for all rows given a repo name (ordered by time)
1626
1615
>>> rows = p.project_timeline(['time','repo'], 'mrtrevanderson_CECS_424')
@@ -1635,13 +1624,12 @@ def project_timeline(self, cols, repo):
1635
1624
cols = self .__wrap_cols (cols )
1636
1625
query_str = 'SELECT {} FROM {} WHERE repo=\' {}\' ORDER BY time' \
1637
1626
.format (', ' .join (cols ), self .tb_name , repo )
1638
- rows_iter = self .query_iter (query_str )
1639
- for row in rows_iter :
1640
- yield row
1627
+ return self .query_iter (query_str )
1641
1628
1642
1629
def author_timeline (self , cols , author ):
1643
1630
""" return a generator for all rows given an author (ordered by time)
1644
- >>> rows = p.author_timeline(['time', 'repo'], 'Andrew Gacek <[email protected] >')
1631
+ >>> rows = p.author_timeline(
1632
+ ... ['time', 'repo'], 'Andrew Gacek <[email protected] >')
1645
1633
>>> for row in rows:
1646
1634
... print(row)
1647
1635
...
@@ -1653,13 +1641,10 @@ def author_timeline(self, cols, author):
1653
1641
cols = self .__wrap_cols (cols )
1654
1642
query_str = 'SELECT {} FROM {} WHERE author=\' {}\' ORDER BY time' \
1655
1643
.format (', ' .join (cols ), self .tb_name , author )
1656
- rows_iter = self .query_iter (query_str )
1657
- for row in rows_iter :
1658
- yield row
1644
+ return self .query_iter (query_str )
1659
1645
1660
1646
def __wrap_cols (self , cols ):
1661
- """ wraps cols to select before querying
1662
- """
1647
+ """ wraps cols to select before querying """
1663
1648
for i in range (len (cols )):
1664
1649
if cols [i ] == 'sha1' or cols [i ] == 'blob' :
1665
1650
cols [i ] = 'lower(hex({}))' .format (cols [i ])
0 commit comments