Update scripts.

keygenqt · Dec 18, 2024 · ad11167 · ad11167
1 parent 6cf1bf0
commit ad11167
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 46 deletions.
diff --git a/scripts/stats.py b/scripts/stats.py
@@ -22,63 +22,48 @@
 
     data = json.loads(result)
     dataset = data['dataset']
-    faq = dataset['faq']
-    pub = dataset['pub']
 
     # Count lines
-    linesRu = 0
-    linesEn = 0
-    path = get_path_project()
-    with open(path / 'collection' / 'faq' /  'ru' /  'dataset.pkl'  , 'r') as fp:
-        linesRu += len(fp.readlines())
-    with open(path / 'collection' / 'pub' /  'ru' /  'dataset.pkl'  , 'r') as fp:
-        linesRu += len(fp.readlines())
-    with open(path / 'collection' / 'faq' /  'en' /  'dataset.pkl'  , 'r') as fp:
-        linesEn += len(fp.readlines())
-    with open(path / 'collection' / 'pub' /  'en' /  'dataset.pkl'  , 'r') as fp:
-        linesEn += len(fp.readlines())
+    lines = 0
+    for file in get_path_project().rglob('*.pkl'):
+        with open(file, 'r') as fp:
+            lines += len(fp.readlines())
 
     # Count authors
-    authorsRu = []
-    authorsEn = []
-    for item in dataset['faq']['ru']:
+    authors = []
+    for item in dataset:
         hash_name = hashlib.md5('{}'.format(item['a']).encode('utf-8')).hexdigest()
-        if hash_name not in authorsRu:
-            authorsRu.append(hash_name)
-    for item in dataset['faq']['en']:
-        hash_name = hashlib.md5('{}'.format(item['a']).encode('utf-8')).hexdigest()
-        if hash_name not in authorsEn:
-            authorsEn.append(hash_name)
+        if hash_name not in authors:
+            authors.append(hash_name)
 
     # gen table data
-    headers = ['Name', 'Ru', 'En']
+    headers = ['Name', 'Count']
     states = [
-        ['FAQ', len(faq['ru'])-1, len(faq['en'])-1],
-        ['Publications', len(pub['ru']), len(pub['en'])],
+        ['Items', len(dataset)],
         [],
-        ['Authors', len(authorsRu), len(authorsEn)],
-        ['Lines of code', linesRu, linesEn],
+        ['Authors', len(authors)],
+        ['Lines of code', lines],
     ]
 
     # print table headers
     for col in headers:
-        print(col.ljust(18), end='')
+        print(col.ljust(16), end='')
     print()
     for i, col in enumerate(headers):
-        print('+----------------', end=('+' if i == len(headers) - 1 else '-'))
+        print('+--------------', end=('+' if i == len(headers) - 1 else '-'))
     print()
 
     # print table rows
     for i, row in enumerate(states, start=1):
         if not row:
             for _, _ in enumerate(headers):
-                print('-----------------', end='-')
+                print('---------------', end='-')
             print()
         else:
             for col in row:
-                print(str(col).ljust(18), end='')
+                print(str(col).ljust(16), end='')
             print()
 
     for _, _ in enumerate(headers):
-        print('-----------------', end='-')
+        print('---------------', end='-')
     print()
diff --git a/scripts/validate.py b/scripts/validate.py
@@ -49,24 +49,15 @@ def validate_body_size(items) -> bool:
     results = []
     data = json.loads(result)
     dataset = data['dataset']
-    faq = dataset['faq']
-    pub = dataset['pub']
 
-    print('Validate duplicate title FAQ...')
-    results.append(validate_duplicate_title(faq['en']))
-    results.append(validate_duplicate_title(faq['ru']))
+    print('Validate duplicate title...')
+    results.append(validate_duplicate_title(dataset))
 
-    print('Validate duplicate title Publication...')
-    results.append(validate_duplicate_title(pub['en']))
-    results.append(validate_duplicate_title(pub['ru']))
-
-    print('Validate size body FAQ...')
-    results.append(validate_body_size(faq['en']))
-    results.append(validate_body_size(faq['ru']))
+    print('Validate size body...')
+    results.append(validate_body_size(dataset))
 
     print('Validate size body Publication...')
-    results.append(validate_body_size(pub['en']))
-    results.append(validate_body_size(pub['ru']))
+    results.append(validate_body_size(dataset))
 
     if len([item for item in results if not item]) > 0:
         print('Result: validation errors found.')