Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Translation Delivery Time CLI

This command line application calculates the moving average delivery time of translation events based on a specified window size.

## Requirements

- Python 3.7.x or higher

## Installation

1. Clone or download the repository to your local machine.
2. Ensure you have Python installed.

## Usage

Run the application from the command line with the following command:
python unbabel_cli.py --input_file events.json --window_size 10


Replace `events.json` with the path to your input JSON file containing translation events and `10` with your desired window size.

## Testing

To test the application, you can run the unit tests provided in the `tests-unbabel-cli.py` file. Use the following command:
python -m unittest tests_unbabel_cli.py

DISCLAIMER: Unfortunatly the output is not as expected, the code is what I could do with the time I had
86 changes: 0 additions & 86 deletions README.md

This file was deleted.

3 changes: 3 additions & 0 deletions events.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"timestamp": "2018-12-26 18:11:08.509654","translation_id": "5aa5b2f39f7254a75aa5","source_language": "en","target_language": "fr","client_name": "airliberty","event_name": "translation_delivered","nr_words": 30, "duration": 20}
{"timestamp": "2018-12-26 18:15:19.903159","translation_id": "5aa5b2f39f7254a75aa4","source_language": "en","target_language": "fr","client_name": "airliberty","event_name": "translation_delivered","nr_words": 30, "duration": 31}
{"timestamp": "2018-12-26 18:23:19.903159","translation_id": "5aa5b2f39f7254a75bb3","source_language": "en","target_language": "fr","client_name": "taxi-eats","event_name": "translation_delivered","nr_words": 100, "duration": 54}
14 changes: 14 additions & 0 deletions output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{"date": "2018-12-26 18:11:00", "average_delivery_time": 0}
{"date": "2018-12-26 18:12:00", "average_delivery_time": 20.0}
{"date": "2018-12-26 18:13:00", "average_delivery_time": 20.0}
{"date": "2018-12-26 18:14:00", "average_delivery_time": 20.0}
{"date": "2018-12-26 18:15:00", "average_delivery_time": 20.0}
{"date": "2018-12-26 18:16:00", "average_delivery_time": 25.5}
{"date": "2018-12-26 18:17:00", "average_delivery_time": 25.5}
{"date": "2018-12-26 18:18:00", "average_delivery_time": 25.5}
{"date": "2018-12-26 18:19:00", "average_delivery_time": 25.5}
{"date": "2018-12-26 18:20:00", "average_delivery_time": 25.5}
{"date": "2018-12-26 18:21:00", "average_delivery_time": 31.0}
{"date": "2018-12-26 18:22:00", "average_delivery_time": 31.0}
{"date": "2018-12-26 18:23:00", "average_delivery_time": 31.0}
{"date": "2018-12-26 18:24:00", "average_delivery_time": 42.5}
46 changes: 46 additions & 0 deletions tests_unbabel_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import unittest
import json
from datetime import datetime
from unbabel_cli import parse_event, moving_average

class TestMovingAverages(unittest.TestCase):
def setUp(self):
self.events = [
{"timestamp": "2018-12-26 18:11:08.509654", "duration": 20},
{"timestamp": "2018-12-26 18:15:19.903159", "duration": 31},
{"timestamp": "2018-12-26 18:23:19.903159", "duration": 54}
]
self.parsed_events = [
{"timestamp": datetime(2018, 12, 26, 18, 11, 8, 509654), "duration": 20},
{"timestamp": datetime(2018, 12, 26, 18, 15, 19, 903159), "duration": 31},
{"timestamp": datetime(2018, 12, 26, 18, 23, 19, 903159), "duration": 54}
]

def test_parse_event(self):
parsed = [parse_event(event) for event in self.events]
self.assertEqual(parsed, self.parsed_events)

def test_moving_average(self):
results = moving_average(self.parsed_events, 10)
expected_results = [
{"date": "2018-12-26 18:11:00", "average_delivery_time": 0},
{"date": "2018-12-26 18:12:00", "average_delivery_time": 20},
{"date": "2018-12-26 18:13:00", "average_delivery_time": 20},
{"date": "2018-12-26 18:14:00", "average_delivery_time": 20},
{"date": "2018-12-26 18:15:00", "average_delivery_time": 20},
{"date": "2018-12-26 18:16:00", "average_delivery_time": 25.5},
{"date": "2018-12-26 18:17:00", "average_delivery_time": 25.5},
{"date": "2018-12-26 18:18:00", "average_delivery_time": 25.5},
{"date": "2018-12-26 18:19:00", "average_delivery_time": 25.5},
{"date": "2018-12-26 18:20:00", "average_delivery_time": 25.5},
{"date": "2018-12-26 18:21:00", "average_delivery_time": 25.5},
{"date": "2018-12-26 18:22:00", "average_delivery_time": 31},
{"date": "2018-12-26 18:23:00", "average_delivery_time": 31},
{"date": "2018-12-26 18:24:00", "average_delivery_time": 42.5}
]
self.assertEqual(results, expected_results)

# Add other tests here if needed

if __name__ == '__main__':
unittest.main()
88 changes: 88 additions & 0 deletions unbabel_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import json
import argparse
from datetime import datetime, timedelta

# Function to parse each event from JSON format
def parse_event(event):
return {
'timestamp': datetime.strptime(event['timestamp'], '%Y-%m-%d %H:%M:%S.%f'),
'duration': event['duration']
}

# Function to calculate the moving average delivery time
def moving_average(events, window_size):
# Sort events based on timestamp
events = sorted(events, key=lambda x: x['timestamp'])
# Define start and end times of the event stream
start_time = events[0]['timestamp'].replace(second=0, microsecond=0)
end_time = events[-1]['timestamp'].replace(second=0, microsecond=0) + timedelta(minutes=1)
# Define the window size
window = timedelta(minutes=window_size)
result = []

# Loop through each minute in the event stream
current_time = start_time
while current_time <= end_time:
# Calculate the start of the current window
window_start = current_time - window + timedelta(minutes=1)
# Extract events within the current window
window_events = [event['duration'] for event in events if window_start <= event['timestamp'] <= current_time]
# Calculate the average delivery time for the events within the window
if window_events:
avg_duration = sum(window_events) / len(window_events)
else:
avg_duration = 0
# Append the result to the output list
result.append({'date': current_time.strftime('%Y-%m-%d %H:%M:%S'), 'average_delivery_time': avg_duration})
# Move to the next minute
current_time += timedelta(minutes=1)

return result



# Main function to read input, calculate moving average, and write output
def main(input_file, window_size):
try:
# Read events from the input file
events = []
with open(input_file, 'r') as file:
for line in file:
try:
# Parse each line as JSON and append to events list
event = json.loads(line.strip())
events.append(parse_event(event))
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in input file: {e}")

# Check if events list is empty
if not events:
raise ValueError("Input file is empty")

# Check if window size is valid
if not isinstance(window_size, int) or window_size <= 0:
raise ValueError("Window size must be a positive integer")

# Calculate moving average delivery time
averages = moving_average(events, window_size)

# Write output to file
with open('output.json', 'w') as file:
for avg in averages:
file.write(json.dumps(avg) + '\n')

except FileNotFoundError:
print(f"Error: Input file '{input_file}' not found")
except ValueError as e:
print(f"Error: {e}")


# Command line argument parsing
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process translation delivery events.')
parser.add_argument('--input_file', type=str, required=True, help='Path to the input JSON file with events.')
parser.add_argument('--window_size', type=int, required=True, help='Size of the moving average window in minutes.')

args = parser.parse_args()
# Call main function with provided arguments
main(args.input_file, args.window_size)