Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Error handling #5

Merged
merged 1 commit into from
Mar 25, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 60 additions & 47 deletions bionicpython/bionicpython.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,56 +35,71 @@ def process_word(word, ratio):
def process_document(doc_path, ratio):
# Check if the file path is for a .docx file
if doc_path.endswith('.docx'):
print("already in docx format")
print("Already in docx format")
elif doc_path.endswith('.pdf'):
docx_path = doc_path.replace('.pdf', '.docx')
# Run the conversion script as a subprocess with the same Python interpreter
subprocess.run([sys.executable, 'converter.py', '--pdf', doc_path, '--docx', docx_path])
# Replace '.pdf' with '.docx' in the file path
doc_path = docx_path
try:
# Run the conversion script as a subprocess
subprocess.run([sys.executable, 'converter.py', '--pdf', doc_path, '--docx', docx_path])
# Replace '.pdf' with '.docx' in the file path
doc_path = docx_path
except subprocess.CalledProcessError as e:
print("Error converting PDF:", e)
sys.exit(1)
else:
print("files of this format are not supported yet")
print("please use either .pdf or .docx files")
print("Files of this format are not supported yet")
print("Please use either .pdf or .docx files")
sys.exit()

# Load the spacy model for word recognition
nlp = spacy.load('en_core_web_sm')

# Open the .docx file
word_doc = Document(doc_path)

for paragraph in word_doc.paragraphs:
for run in paragraph.runs:
# Skip if the run is already bold
if run.bold:
continue

# Split the run text into words
# words = run.text.split()
# words = [word + ' ' for word in run.text.split()]
# words = re.findall(r'\s*\S+', run.text)
# words = re.findall(r'(?:^|\s)\S+', run.text)
words = run.text.split(' ')
words = [' ' + word if i != 0 else word for i, word in enumerate(words)]

# Process each word
new_runs = []
for word in words:
# Use spacy to recognize the words
doc = nlp(word)
for token in doc:
# Bolden a ratio of the characters in the word
runs = process_word(token.text, ratio)
new_runs.extend(runs)

# Clear the original run
run.text = ''

# Add new runs with the appropriate formatting
for text, is_bold in new_runs:
new_run = paragraph.add_run(text)
new_run.bold = is_bold

# Load the spacy model for word recognition (wrap in try-except)
try:
nlp = spacy.load('en_core_web_sm')
except OSError as e:
print("Error loading spaCy model:", e)
sys.exit(1)

try:
# Open the .docx file
word_doc = Document(doc_path)

for paragraph in word_doc.paragraphs:
for run in paragraph.runs:
# Skip if the run is already bold
if run.bold:
continue

# Split the run text into words
words = run.text.split(' ')
words = [' ' + word if i != 0 else word for i, word in enumerate(words)]

# Process each word
new_runs = []
for word in words:
# Use spacy to recognize the words
doc = nlp(word)
for token in doc:
# Bolden a ratio of the characters in the word
runs = process_word(token.text, ratio)
new_runs.extend(runs)

# Clear the original run
run.text = ''

# Add new runs with the appropriate formatting
for text, is_bold in new_runs:
new_run = paragraph.add_run(text)
new_run.bold = is_bold

# Save the document (wrap in try-except)
try:
word_doc.save(output_path)
except PermissionError as e:
print("Error saving document:", e)
sys.exit(1)

except Exception as e: # Catch any other unexpected errors
print("Unexpected error processing document:", e)
sys.exit(1)

# Get the directory and filename from the input path
dir_name, file_name = os.path.split(doc_path)
Expand All @@ -96,8 +111,6 @@ def process_document(doc_path, ratio):
output_path = os.path.splitext(doc_path)[0] + '_modified.docx'
print(output_path)

word_doc.save(output_path)


def main():
parser = argparse.ArgumentParser()
Expand Down