Skip to content

Commit

Permalink
Update scripts.
Browse files Browse the repository at this point in the history
  • Loading branch information
Vitaliy Zarubin committed Dec 18, 2024
1 parent 6cf1bf0 commit ad11167
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 46 deletions.
49 changes: 17 additions & 32 deletions scripts/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,63 +22,48 @@

data = json.loads(result)
dataset = data['dataset']
faq = dataset['faq']
pub = dataset['pub']

# Count lines
linesRu = 0
linesEn = 0
path = get_path_project()
with open(path / 'collection' / 'faq' / 'ru' / 'dataset.pkl' , 'r') as fp:
linesRu += len(fp.readlines())
with open(path / 'collection' / 'pub' / 'ru' / 'dataset.pkl' , 'r') as fp:
linesRu += len(fp.readlines())
with open(path / 'collection' / 'faq' / 'en' / 'dataset.pkl' , 'r') as fp:
linesEn += len(fp.readlines())
with open(path / 'collection' / 'pub' / 'en' / 'dataset.pkl' , 'r') as fp:
linesEn += len(fp.readlines())
lines = 0
for file in get_path_project().rglob('*.pkl'):
with open(file, 'r') as fp:
lines += len(fp.readlines())

# Count authors
authorsRu = []
authorsEn = []
for item in dataset['faq']['ru']:
authors = []
for item in dataset:
hash_name = hashlib.md5('{}'.format(item['a']).encode('utf-8')).hexdigest()
if hash_name not in authorsRu:
authorsRu.append(hash_name)
for item in dataset['faq']['en']:
hash_name = hashlib.md5('{}'.format(item['a']).encode('utf-8')).hexdigest()
if hash_name not in authorsEn:
authorsEn.append(hash_name)
if hash_name not in authors:
authors.append(hash_name)

# gen table data
headers = ['Name', 'Ru', 'En']
headers = ['Name', 'Count']
states = [
['FAQ', len(faq['ru'])-1, len(faq['en'])-1],
['Publications', len(pub['ru']), len(pub['en'])],
['Items', len(dataset)],
[],
['Authors', len(authorsRu), len(authorsEn)],
['Lines of code', linesRu, linesEn],
['Authors', len(authors)],
['Lines of code', lines],
]

# print table headers
for col in headers:
print(col.ljust(18), end='')
print(col.ljust(16), end='')
print()
for i, col in enumerate(headers):
print('+----------------', end=('+' if i == len(headers) - 1 else '-'))
print('+--------------', end=('+' if i == len(headers) - 1 else '-'))
print()

# print table rows
for i, row in enumerate(states, start=1):
if not row:
for _, _ in enumerate(headers):
print('-----------------', end='-')
print('---------------', end='-')
print()
else:
for col in row:
print(str(col).ljust(18), end='')
print(str(col).ljust(16), end='')
print()

for _, _ in enumerate(headers):
print('-----------------', end='-')
print('---------------', end='-')
print()
19 changes: 5 additions & 14 deletions scripts/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,24 +49,15 @@ def validate_body_size(items) -> bool:
results = []
data = json.loads(result)
dataset = data['dataset']
faq = dataset['faq']
pub = dataset['pub']

print('Validate duplicate title FAQ...')
results.append(validate_duplicate_title(faq['en']))
results.append(validate_duplicate_title(faq['ru']))
print('Validate duplicate title...')
results.append(validate_duplicate_title(dataset))

print('Validate duplicate title Publication...')
results.append(validate_duplicate_title(pub['en']))
results.append(validate_duplicate_title(pub['ru']))

print('Validate size body FAQ...')
results.append(validate_body_size(faq['en']))
results.append(validate_body_size(faq['ru']))
print('Validate size body...')
results.append(validate_body_size(dataset))

print('Validate size body Publication...')
results.append(validate_body_size(pub['en']))
results.append(validate_body_size(pub['ru']))
results.append(validate_body_size(dataset))

if len([item for item in results if not item]) > 0:
print('Result: validation errors found.')
Expand Down

0 comments on commit ad11167

Please sign in to comment.